QUICK TEST

want to test if i can import the procrustes info from morphoJ to do the PCA here instead.

got the procrustes info to import, but couldn’t get the multivariate to work here. So, I’ll just import the regression residuals and work with that

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(curl)
## Using libcurl 8.3.0 with Schannel
## 
## Attaching package: 'curl'
## 
## The following object is masked from 'package:readr':
## 
##     parse_date
test.df <- curl("https://raw.githubusercontent.com/allisondavis/morphology_analysis/refs/heads/master/MV%20regression%2C%20results.txt")

test.df <- read.table(test.df, header = TRUE, sep = "\t")

##### PCA

PCA.test <- prcomp(test.df[, 7:38], center = TRUE, scale. = FALSE) #false uses covariance matrix like in MorphoJ; scale=true only useful if variables are measured on different scales

summary(PCA.test)
## Importance of components:
##                            PC1     PC2     PC3     PC4     PC5     PC6      PC7
## Standard deviation     0.02836 0.01986 0.01969 0.01378 0.01248 0.01121 0.009914
## Proportion of Variance 0.30299 0.14861 0.14601 0.07156 0.05867 0.04729 0.037020
## Cumulative Proportion  0.30299 0.45160 0.59760 0.66916 0.72783 0.77512 0.812150
##                             PC8      PC9    PC10    PC11     PC12     PC13
## Standard deviation     0.008759 0.008336 0.00762 0.00651 0.006144 0.005412
## Proportion of Variance 0.028900 0.026170 0.02187 0.01596 0.014220 0.011030
## Cumulative Proportion  0.841040 0.867210 0.88908 0.90504 0.919260 0.930290
##                            PC14     PC15     PC16     PC17     PC18     PC19
## Standard deviation     0.005218 0.005002 0.004721 0.004455 0.003892 0.003739
## Proportion of Variance 0.010260 0.009420 0.008390 0.007470 0.005710 0.005270
## Cumulative Proportion  0.940550 0.949980 0.958370 0.965850 0.971550 0.976820
##                           PC20     PC21    PC22     PC23     PC24     PC25
## Standard deviation     0.00333 0.003231 0.00321 0.002557 0.002468 0.002408
## Proportion of Variance 0.00418 0.003930 0.00388 0.002460 0.002290 0.002180
## Cumulative Proportion  0.98099 0.984930 0.98881 0.991270 0.993560 0.995750
##                            PC26     PC27    PC28      PC29      PC30     PC31
## Standard deviation     0.002136 0.002034 0.00161 2.896e-16 2.126e-16 2.73e-17
## Proportion of Variance 0.001720 0.001560 0.00098 0.000e+00 0.000e+00 0.00e+00
## Cumulative Proportion  0.997470 0.999020 1.00000 1.000e+00 1.000e+00 1.00e+00
##                             PC32
## Standard deviation     1.825e-17
## Proportion of Variance 0.000e+00
## Cumulative Proportion  1.000e+00
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
(eig.val.test <- get_eigenvalue(PCA.test))
##          eigenvalue variance.percent cumulative.variance.percent
## Dim.1  8.044309e-04     3.029912e+01                    30.29912
## Dim.2  3.945433e-04     1.486058e+01                    45.15971
## Dim.3  3.876418e-04     1.460064e+01                    59.76035
## Dim.4  1.899869e-04     7.155909e+00                    66.91626
## Dim.5  1.557587e-04     5.866694e+00                    72.78295
## Dim.6  1.255665e-04     4.729499e+00                    77.51245
## Dim.7  9.829059e-05     3.702143e+00                    81.21459
## Dim.8  7.672005e-05     2.889683e+00                    84.10427
## Dim.9  6.948059e-05     2.617006e+00                    86.72128
## Dim.10 5.806752e-05     2.187130e+00                    88.90841
## Dim.11 4.237375e-05     1.596019e+00                    90.50443
## Dim.12 3.775063e-05     1.421888e+00                    91.92632
## Dim.13 2.928853e-05     1.103161e+00                    93.02948
## Dim.14 2.723190e-05     1.025697e+00                    94.05518
## Dim.15 2.502220e-05     9.424683e-01                    94.99764
## Dim.16 2.228727e-05     8.394564e-01                    95.83710
## Dim.17 1.984478e-05     7.474592e-01                    96.58456
## Dim.18 1.514869e-05     5.705797e-01                    97.15514
## Dim.19 1.398100e-05     5.265985e-01                    97.68174
## Dim.20 1.108613e-05     4.175621e-01                    98.09930
## Dim.21 1.044038e-05     3.932397e-01                    98.49254
## Dim.22 1.030651e-05     3.881977e-01                    98.88074
## Dim.23 6.537065e-06     2.462204e-01                    99.12696
## Dim.24 6.089258e-06     2.293536e-01                    99.35631
## Dim.25 5.797141e-06     2.183509e-01                    99.57466
## Dim.26 4.563074e-06     1.718695e-01                    99.74653
## Dim.27 4.138116e-06     1.558633e-01                    99.90240
## Dim.28 2.591368e-06     9.760463e-02                   100.00000
## Dim.29 8.389200e-32     3.159816e-27                   100.00000
## Dim.30 4.520650e-32     1.702716e-27                   100.00000
## Dim.31 7.455525e-34     2.808145e-29                   100.00000
## Dim.32 3.331721e-34     1.254902e-29                   100.00000
ind.test <- get_pca_ind(PCA.test)
head(ind.test$coord[,1:4])
##         Dim.1         Dim.2       Dim.3         Dim.4
## 1  0.01340669  0.0078857358 0.014029153 -0.0106578735
## 2  0.01342228 -0.0223168798 0.022468552 -0.0050787697
## 3 -0.02748104 -0.0096209523 0.014553590  0.0091966388
## 4 -0.02001009  0.0003470414 0.008558898  0.0037383369
## 5 -0.03116908  0.0111344766 0.013840877  0.0051050439
## 6 -0.01375579  0.0166512862 0.004231201 -0.0008373614
test.df <- cbind(test.df, ind.test$coord[,1:4])

(loadings <- PCA.test$rotation[, 1:5])
##                     PC1          PC2         PC3          PC4          PC5
## RegResid1   0.135008248 -0.110107210  0.16716573 -0.274849465  0.052275630
## RegResid2   0.155310001 -0.133546894 -0.01466466  0.105705695 -0.190927188
## RegResid3   0.263422372  0.006700256 -0.82407265  0.042422404  0.259518193
## RegResid4   0.148360094  0.156240124 -0.16632659  0.198769756  0.070899948
## RegResid5  -0.670432329 -0.505275356 -0.17727123  0.232038069 -0.095963475
## RegResid6   0.174401325  0.117100951  0.15249628  0.370790715  0.143331415
## RegResid7   0.205954566  0.026521398  0.18463118  0.029099777  0.115944040
## RegResid8   0.029040819 -0.029517028  0.02861223  0.282470957  0.037279697
## RegResid9  -0.079230864  0.183352491 -0.02246982 -0.238692956  0.169871081
## RegResid10  0.100564008  0.002877471 -0.10554812  0.056717141 -0.297849545
## RegResid11 -0.105055841  0.247292942  0.01872475 -0.026959085 -0.001853964
## RegResid12  0.059706631 -0.090181853 -0.12731485 -0.093271461 -0.238773394
## RegResid13 -0.042277217  0.087980640 -0.13963359 -0.340711764 -0.209979248
## RegResid14  0.005249607 -0.188157464 -0.09839363 -0.199721166 -0.055348975
## RegResid15  0.143634501 -0.185964018  0.12708402  0.054968122  0.139445654
## RegResid16 -0.072327608 -0.031638025  0.09710973 -0.025205391  0.369938210
## RegResid17  0.188160260 -0.387448897  0.13142518 -0.018493539  0.196717211
## RegResid18 -0.158585258  0.035307617  0.06818033 -0.078120936  0.345505236
## RegResid19 -0.003795688 -0.134926056  0.06605121  0.099567189  0.097667438
## RegResid20 -0.363086378  0.352140160 -0.05579576 -0.145489784  0.136992213
## RegResid21 -0.145276979  0.213625337  0.09734773  0.425220615  0.077901415
## RegResid22 -0.044727402  0.001069053  0.00684746 -0.188717407  0.008350602
## RegResid23 -0.030163021  0.231249783  0.04663237 -0.079076106  0.016419621
## RegResid24 -0.011025942 -0.098728303  0.06183703 -0.049875094  0.024288123
## RegResid25  0.023045499  0.169496081  0.02669218  0.143853048 -0.326092317
## RegResid26 -0.122714633  0.038725202  0.01041427 -0.024921153 -0.034698824
## RegResid27 -0.038783243  0.209659503  0.01854588  0.157356674 -0.317146993
## RegResid28 -0.119849240  0.035390979  0.06277723 -0.168120638 -0.019423858
## RegResid29  0.074531941 -0.035206773  0.15266214 -0.174676652 -0.067499447
## RegResid30  0.118234295 -0.083421229  0.04286607  0.006064492 -0.110964461
## RegResid31  0.081257793 -0.016950123  0.12648494 -0.031066332 -0.107224840
## RegResid32  0.101449680 -0.083660762  0.03690298 -0.047075726 -0.188599199
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")

##### POST PCA TESTS

lat.p <- test.df[test.df$Species == "p.latipinna",]


form.p <- test.df[test.df$Species == "p.formosa",]

(pc1 <- t.test(lat.p$Dim.1, form.p$Dim.1, alternative = "two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  lat.p$Dim.1 and form.p$Dim.1
## t = 24.956, df = 303.49, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03970850 0.04650662
## sample estimates:
##   mean of x   mean of y 
##  0.02497059 -0.01813697
(pc2 <- t.test(lat.p$Dim.2, form.p$Dim.2, alternative = "two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  lat.p$Dim.2 and form.p$Dim.2
## t = -0.48969, df = 277.42, p-value = 0.6247
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005099263  0.003067681
## sample estimates:
##   mean of x   mean of y 
## 0.001469468 0.002485259
(pc3 <- t.test(lat.p$Dim.3, form.p$Dim.3, alternative = "two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  lat.p$Dim.3 and form.p$Dim.3
## t = 4.4683, df = 303.32, p-value = 1.115e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.005281476 0.013594355
## sample estimates:
##    mean of x    mean of y 
##  0.005540339 -0.003897577
(pc1V <- leveneTest(Dim.1~Species, data=test.df))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   2  0.3452 0.7084
##       337
(pc2V <- leveneTest(Dim.2~Species, data=test.df))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   2  7.5716 0.0006074 ***
##       337                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(pc3V <- leveneTest(Dim.3~Species, data=test.df))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   2  4.3582 0.01353 *
##       337                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##### PLOTS

library(AMR) 
library(ggplot2)
library(ggfortify)
library(ggbiplot)
## 
## Attaching package: 'ggbiplot'
## 
## The following object is masked from 'package:ggfortify':
## 
##     ggbiplot
wes.yel <- "#E1AF00"
wes.blu <- "#78B7C5"

plot1<- autoplot(PCA.test, data = test.df, colour="Species", loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+scale_color_manual(values=c("#E1AF00","#78B7C5", "#9986A5"))+ scale_fill_manual(values=c("#E1AF00","#78B7C5", "#9986A5"))+ ggtitle("PCA Plot of Morphology traits") + theme_classic() 

plot1

plot2<- autoplot(PCA.test, x=2, y=3, data = test.df, colour='Species', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+scale_color_manual(values=c("#E1AF00","#78B7C5", "#9986A5"))+ scale_fill_manual(values=c("#E1AF00","#78B7C5", "#9986A5"))+ ggtitle("PCA Plot of Morphology traits") + theme_classic()
plot2

Experiment overview

Purpose: Briefly outline the purpose of your experiment.

Hypothesis: Outline the hypotheses if applicable.

Predictions: - Prediction 1 (if applicable).

  • How it relates to hypothesis: describe how this prediction relates to the hypothesis

Data: Briefly describe the type of data collected.


Data

To get digital distances: The TPS coordinates were in a long format, and I thought it would be easiest to locate the values needed to insert into the euclidean distance formula with a wide format. So, I first cleaned up the original TPS file–I removed all curve data, and unnecessary rows (ie. LM=16, image=) to be left with a column of IDs, a column of landmark labels, and two columns for the coordinates.

Importing that into R, I then used the wider fxn to create one column for IDs, 16 columns for the X coordinates of all the landmarks, and 16 columns for all the y coordinates of all the landmarks (originally wanted it to be a coordinates column, so there would be two rows for each specimen to minimize column numbers, but whatever, this works). I separated this into two dataframes, one with just the X coords and one with just the Y, that way it would be easier to call.

I calculated the euclidean distance (formula: sqrt(sq(X\(LM2-X\)LM1)+sq(Y\(LM2-Y\)LM1))) for most of the same measurements (only missing total length, and head width, as this was not possible in the digital photos). For head depth, wasn’t sure if 2-> 11 or 2->12 was closer to what I measured by hand; same for body depth, wasn’t sure if it was 3->10 or 3->11. I included both.

finished all of the calculations and added the values into a dataframe with the IDs from the original tps file. Exported it to excel

Once in excel, I realized that the values I had were in pixels (I think?) rather than mm, so I needed to find a way to convert in order to compare to the hand measurements. I found out that the scale factor (included in the original tps file) is mm/pixel, so I added this to the excel, and multiplied it to each value to get the mm. However, it was a bit off (eg 4.5 rather than 45) so I multiplied it by 10 to get to a number that resembled the hand measurements. Not 100% sure if this was correct, or why I would need to multiply by 10, but going with it for now.

library(tidyverse)
library(curl)

raw <- curl("https://raw.githubusercontent.com/allisondavis/morphology_analysis/refs/heads/master/dig-dist.csv")

raw <- read.csv(raw, header = TRUE, sep = ",", stringsAsFactors = TRUE)

raw1 <- raw %>%
  pivot_wider(names_from = LABELS, values_from = c(X.COR, Y.COR))


x <- raw1[ , 1:17]
y <- raw1[, c(1, 18:33)]

HL.dig <- sqrt(((x$X.COR_LM2 -x$X.COR_LM1)^2 + (y$Y.COR_LM2-y$Y.COR_LM1)^2))
SL.dig <- sqrt(((x$X.COR_LM6 -x$X.COR_LM1)^2 + (y$Y.COR_LM6-y$Y.COR_LM1)^2))
PreDL.dig <- sqrt(((x$X.COR_LM3 -x$X.COR_LM1)^2 + (y$Y.COR_LM3-y$Y.COR_LM1)^2))
DbL.dig <- sqrt(((x$X.COR_LM4 -x$X.COR_LM3)^2 + (y$Y.COR_LM4-y$Y.COR_LM3)^2))
CPD.dig <- sqrt(((x$X.COR_LM7 -x$X.COR_LM5)^2 + (y$Y.COR_LM7-y$Y.COR_LM5)^2))
HD1.dig <- sqrt(((x$X.COR_LM11 -x$X.COR_LM2)^2 + (y$Y.COR_LM11-y$Y.COR_LM2)^2))
HD2.dig <- sqrt(((x$X.COR_LM12 -x$X.COR_LM2)^2 + (y$Y.COR_LM12-y$Y.COR_LM2)^2))
CPL.dig <- sqrt(((x$X.COR_LM8 -x$X.COR_LM6)^2 + (y$Y.COR_LM8-y$Y.COR_LM6)^2))
BD1.dig <- sqrt(((x$X.COR_LM3 -x$X.COR_LM10)^2 + (y$Y.COR_LM3-y$Y.COR_LM10)^2))
BD2.dig <- sqrt(((x$X.COR_LM3 -x$X.COR_LM11)^2 + (y$Y.COR_LM3-y$Y.COR_LM11)^2))
SnL.dig <- sqrt(((x$X.COR_LM15 -x$X.COR_LM1)^2 + (y$Y.COR_LM15-y$Y.COR_LM1)^2))
OL.dig <- sqrt(((x$X.COR_LM16 -x$X.COR_LM15)^2 + (y$Y.COR_LM16-y$Y.COR_LM15)^2))

raw2 <- cbind(raw1[,1], SL.dig, BD1.dig, BD2.dig, CPD.dig, CPL.dig, PreDL.dig, DbL.dig, HL.dig, HD1.dig, HD2.dig, SnL.dig, OL.dig)

library(writexl)

write_xlsx(raw2, 'C:/Users/allis/OneDrive/Desktop/comparison_data2.xlsx')

Performed most analyses in MorphoJ. Analyses here will strictly be for comparing digital vs hand measurements.

library(tidyverse)
library(curl)

raw <- curl("https://raw.githubusercontent.com/allisondavis/morphology_analysis/refs/heads/master/comparison_data.csv")

raw <- read.csv(raw, header = TRUE, sep = ",", stringsAsFactors = TRUE)

#AD20-084 is missing all manual values, so I will remove to avoid issues with NAs
raw<- raw[raw$ID !="AD20-082", ]

Normality check

I will use Shapiro-wilk, histograms, and QQ plots to determine what traits are normal.

Will separate my raw dataset into the two factors of interest: digital and manual measurements.

library(dplyr)

digital <- raw %>%
  select(ID, SPP, ends_with(".dig"))

manual <- raw %>%
  select(ID, SPP, ends_with(".man"))

Digital

All but OL fail SW test with a slight skew/deviation to the right.

##### Shapiro-Wilk test #####
shapiro.test(digital$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$SL
## W = 0.97473, p-value = 1.165e-05
shapiro.test(digital$BD1)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$BD1
## W = 0.98196, p-value = 0.0002967
shapiro.test(digital$BD2)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$BD2
## W = 0.9907, p-value = 0.03071
shapiro.test(digital$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$CPD
## W = 0.96472, p-value = 2.6e-07
shapiro.test(digital$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$CPL
## W = 0.95444, p-value = 9.409e-09
shapiro.test(digital$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$PreDL
## W = 0.98017, p-value = 0.0001268
shapiro.test(digital$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$DbL
## W = 0.9864, p-value = 0.002823
shapiro.test(digital$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$HL
## W = 0.95243, p-value = 5.184e-09
shapiro.test(digital$HD1)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$HD1
## W = 0.9765, p-value = 2.463e-05
shapiro.test(digital$HD2)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$HD2
## W = 0.97148, p-value = 3.143e-06
shapiro.test(digital$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$SnL
## W = 0.97219, p-value = 4.145e-06
shapiro.test(digital$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  digital$OL
## W = 0.99609, p-value = 0.5705
##### Histograms #####

par(mfcol=c(2,2), oma = c(0,0,2,0))

hist(digital$SL, breaks=30)

hist(digital$BD1, breaks=30)

hist(digital$BD2, breaks=30)

hist(digital$CPD, breaks=30)

hist(digital$CPL, breaks=30)

hist(digital$PreDL, breaks=30)

hist(digital$DbL, breaks=30)

hist(digital$HL, breaks=30)

hist(digital$HD1, breaks=30)

hist(digital$HD2, breaks=30)

hist(digital$SnL, breaks=30)

hist(digital$OL, breaks=30)

##### QQ plots #####

par(mfcol=c(2,2), oma = c(0,0,2,0))

qqnorm(digital$SL)
qqline(digital$SL)

qqnorm(digital$BD1)
qqline(digital$BD1)

qqnorm(digital$BD2)
qqline(digital$BD2)

qqnorm(digital$CPD)
qqline(digital$CPD)

qqnorm(digital$CPL)
qqline(digital$CPL)

qqnorm(digital$PreDL)
qqline(digital$PreDL)

qqnorm(digital$DbL)
qqline(digital$DbL)

qqnorm(digital$HL)
qqline(digital$HL)

qqnorm(digital$HD1)
qqline(digital$HD1)

qqnorm(digital$HD2)
qqline(digital$HD2)

qqnorm(digital$SnL)
qqline(digital$SnL)

qqnorm(digital$OL)
qqline(digital$OL)

Manual

All fail the SW test, with a slight skew/deviation to the right.

##### Shapiro-Wilk test #####
shapiro.test(manual$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$SL
## W = 0.98036, p-value = 0.0001385
shapiro.test(manual$BD1)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$BD1
## W = 0.96494, p-value = 2.806e-07
shapiro.test(manual$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$CPD
## W = 0.97037, p-value = 2.046e-06
shapiro.test(manual$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$CPL
## W = 0.97427, p-value = 9.619e-06
shapiro.test(manual$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$PreDL
## W = 0.98037, p-value = 0.0001391
shapiro.test(manual$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$DbL
## W = 0.98347, p-value = 0.00062
shapiro.test(manual$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$HL
## W = 0.97032, p-value = 2.009e-06
shapiro.test(manual$HD1)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$HD1
## W = 0.98378, p-value = 0.0007249
shapiro.test(manual$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$SnL
## W = 0.97615, p-value = 2.113e-05
shapiro.test(manual$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual$OL
## W = 0.98828, p-value = 0.007818
##### Histograms #####

par(mfcol=c(2,2), oma = c(0,0,2,0))

hist(manual$SL, breaks=30)

hist(manual$BD1, breaks=30)

hist(manual$CPD, breaks=30)

hist(manual$CPL, breaks=30)

hist(manual$PreDL, breaks=30)

hist(manual$DbL, breaks=30)

hist(manual$HL, breaks=30)

hist(manual$HD1, breaks=30)

hist(manual$SnL, breaks=30)

hist(manual$OL, breaks=30)


##### QQ plots #####

par(mfcol=c(2,2), oma = c(0,0,2,0))

qqnorm(manual$SL)
qqline(manual$SL)

qqnorm(manual$BD1)
qqline(manual$BD1)

qqnorm(manual$CPD)
qqline(manual$CPD)

qqnorm(manual$CPL)
qqline(manual$CPL)

qqnorm(manual$PreDL)
qqline(manual$PreDL)

qqnorm(manual$DbL)
qqline(manual$DbL)

qqnorm(manual$HL)
qqline(manual$HL)

qqnorm(manual$HD1)
qqline(manual$HD1)

qqnorm(manual$SnL)
qqline(manual$SnL)

qqnorm(manual$OL)
qqline(manual$OL)

Log transform & recheck

While some values are still significant, log transformations vastly improve normality (eg p=2e-16 to p=0.002). The histograms and QQ plots look pretty damn normal, so I will stick with log transformed values.

Log transformed data sets

dig_trans <- digital
dig_trans[, c(3:14)] <- log(dig_trans[, c(3:14)])

man_trans <- manual
man_trans[, c(3:12)] <- log(man_trans[, c(3:12)])

Digital

OL was the only one that was normal in raw check, but since it was not normal in manual, and I want to compare dig to man, I will transform the OL here too (don’t want to compare raw to transformed measurements).

##### Shapiro-Wilk test #####
shapiro.test(dig_trans$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$SL
## W = 0.99403, p-value = 0.2055
shapiro.test(dig_trans$BD1)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$BD1
## W = 0.9938, p-value = 0.1809
shapiro.test(dig_trans$BD2)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$BD2
## W = 0.98978, p-value = 0.0181
shapiro.test(dig_trans$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$CPD
## W = 0.99187, p-value = 0.06017
shapiro.test(dig_trans$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$CPL
## W = 0.99116, p-value = 0.03989
shapiro.test(dig_trans$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$PreDL
## W = 0.99151, p-value = 0.04877
shapiro.test(dig_trans$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$DbL
## W = 0.98425, p-value = 0.0009222
shapiro.test(dig_trans$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$HL
## W = 0.99129, p-value = 0.04319
shapiro.test(dig_trans$HD1)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$HD1
## W = 0.99687, p-value = 0.758
shapiro.test(dig_trans$HD2)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$HD2
## W = 0.99479, p-value = 0.3068
shapiro.test(dig_trans$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$SnL
## W = 0.99732, p-value = 0.8576
shapiro.test(dig_trans$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  dig_trans$OL
## W = 0.98989, p-value = 0.01936
##### Histograms #####

par(mfcol=c(2,2), oma = c(0,0,2,0))

hist(dig_trans$SL, breaks=30)

hist(dig_trans$BD1, breaks=30)

hist(dig_trans$BD2, breaks=30)

hist(dig_trans$CPD, breaks=30)

hist(dig_trans$CPL, breaks=30)

hist(dig_trans$PreDL, breaks=30)

hist(dig_trans$DbL, breaks=30)

hist(dig_trans$HL, breaks=30)

hist(dig_trans$HD1, breaks=30)

hist(dig_trans$HD2, breaks=30)

hist(dig_trans$SnL, breaks=30)

hist(dig_trans$OL, breaks=30)

##### QQ plots #####

par(mfcol=c(2,2), oma = c(0,0,2,0))

qqnorm(dig_trans$SL)
qqline(dig_trans$SL)

qqnorm(dig_trans$BD1)
qqline(dig_trans$BD1)

qqnorm(dig_trans$BD2)
qqline(dig_trans$BD2)

qqnorm(dig_trans$CPD)
qqline(dig_trans$CPD)

qqnorm(dig_trans$CPL)
qqline(dig_trans$CPL)

qqnorm(dig_trans$PreDL)
qqline(dig_trans$PreDL)

qqnorm(dig_trans$DbL)
qqline(dig_trans$DbL)

qqnorm(dig_trans$HL)
qqline(dig_trans$HL)

qqnorm(dig_trans$HD1)
qqline(dig_trans$HD1)

qqnorm(dig_trans$HD2)
qqline(dig_trans$HD2)

qqnorm(dig_trans$SnL)
qqline(dig_trans$SnL)

qqnorm(dig_trans$OL)
qqline(dig_trans$OL)

Manual

##### Shapiro-Wilk test #####
shapiro.test(man_trans$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$SL
## W = 0.99424, p-value = 0.2297
shapiro.test(man_trans$BD1)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$BD1
## W = 0.99567, p-value = 0.4747
shapiro.test(man_trans$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$CPD
## W = 0.99598, p-value = 0.5428
shapiro.test(man_trans$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$CPL
## W = 0.98966, p-value = 0.01693
shapiro.test(man_trans$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$PreDL
## W = 0.98692, p-value = 0.003722
shapiro.test(man_trans$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$DbL
## W = 0.98849, p-value = 0.008812
shapiro.test(man_trans$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$HL
## W = 0.99081, p-value = 0.03262
shapiro.test(man_trans$HD1)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$HD1
## W = 0.99264, p-value = 0.09385
shapiro.test(man_trans$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$SnL
## W = 0.99033, p-value = 0.02488
shapiro.test(man_trans$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  man_trans$OL
## W = 0.97981, p-value = 0.0001072
##### Histograms #####

par(mfcol=c(2,2), oma = c(0,0,2,0))

hist(man_trans$SL, breaks=30)

hist(man_trans$BD1, breaks=30)

hist(man_trans$CPD, breaks=30)

hist(man_trans$CPL, breaks=30)

hist(man_trans$PreDL, breaks=30)

hist(man_trans$DbL, breaks=30)

hist(man_trans$HL, breaks=30)

hist(man_trans$HD1, breaks=30)

hist(man_trans$SnL, breaks=30)

hist(man_trans$OL, breaks=30)


##### QQ plots #####

par(mfcol=c(2,2), oma = c(0,0,2,0))

qqnorm(man_trans$SL)
qqline(man_trans$SL)

qqnorm(man_trans$BD1)
qqline(man_trans$BD1)

qqnorm(man_trans$CPD)
qqline(man_trans$CPD)

qqnorm(man_trans$CPL)
qqline(man_trans$CPL)

qqnorm(man_trans$PreDL)
qqline(man_trans$PreDL)

qqnorm(man_trans$DbL)
qqline(man_trans$DbL)

qqnorm(man_trans$HL)
qqline(man_trans$HL)

qqnorm(man_trans$HD1)
qqline(man_trans$HD1)

qqnorm(man_trans$SnL)
qqline(man_trans$SnL)

qqnorm(man_trans$OL)
qqline(man_trans$OL)

I will perform 3 different analyses: ANOVA, Bland-Altman and PCA. These vary in whether they need long or wide formats. Also, the raw data contains columns that are not needed for these analyses (eg location or photo date) so I will remove those to leave just the species, tag ID, and measurements.

Long format

library(tidyr)

#log transform entire data set

raw2 <- raw
raw2[, c(3:24)] <- log(raw2[, c(3:24)])

# this will create a data frame with species, ID, characteristic, method and value
df_long <- raw2 %>%
  pivot_longer(
    cols = ends_with(".dig") | ends_with(".man"),
    names_to = c("characteristic", "method"),
    names_sep = "\\.",
    values_to = "value"
  )

# method and characteristic are initially a character; need to be a factor
df_long$method <- as.factor(df_long$method)
df_long$characteristic <- as.factor(df_long$characteristic)

# one fish (AD20-082) is missing hand measurements; will remove

df_long <- df_long[df_long$ID !="AD20-082", ]


#create data frames that only have one measurement for BD and HD

## BD1 and HD1 for both digital and manual measurements 
df_long2 <- df_long[-grep("BD2", df_long$characteristic),]
df_long2 <- df_long2[-grep("HD2", df_long2$characteristic),]#only contains BD1 & HD1

## BD1/HD1 for manual (since they only have one) and BD2/HD2 for digital
df_long3 <- df_long %>%
    filter(
    # Keep everything for Hand measurements
    method == "man" |
    # Keep all digital measurements except BD1 and HD1
    !(method == "dig" & characteristic %in% c("BD1", "HD1"))
  )

#to ensure comparisons between BD2 and BD1 in this new data frame, will combine characteristic names to just BD or HD

df_long3<- df_long3 %>%
  mutate(
    characteristic = case_when(
      characteristic %in% c("BD1", "BD2") ~ "BD",
      characteristic %in% c("HD1", "HD2") ~ "HD",
      TRUE ~ characteristic  # Leave other characteristics unchanged
    )
  )

Wide format

df_wide <- df_long2 %>%
  pivot_wider(
    names_from = method,
    values_from = value
  )


df_wide2 <- df_long3 %>%
  pivot_wider(
    names_from = method,
    values_from = value
  )

ANOVA

Results: no significant difference between hand or digital measurements when using BD2/HD2; suggestive difference in method when using BD1/HD1 (0.0584).

No effect of species (did’t expect there to be, but just in case).

# NOTE: analysis requires long format

#### for BD1 and HD1
anova_results <- aov(value~method * SPP, data = df_long2)
summary(anova_results)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## method         1      2  2.0122   3.584 0.0584 .
## SPP            2      2  0.9491   1.691 0.1845  
## method:SPP     2      1  0.3224   0.574 0.5631  
## Residuals   6774   3803  0.5614                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_results2 <- aov(value~method + SPP, data = df_long2)
summary(anova_results2)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## method         1      2  2.0122   3.585 0.0584 .
## SPP            2      2  0.9491   1.691 0.1845  
## Residuals   6776   3804  0.5613                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_results3 <- aov(value~method, data = df_long2)
summary(anova_results3)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## method         1      2  2.0122   3.584 0.0584 .
## Residuals   6778   3805  0.5614                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#### for BD2 and HD2 in digital, BD1 and HD1 in hand
anova_results4 <- aov(value~method * SPP, data = df_long3)
summary(anova_results4)
##               Df Sum Sq Mean Sq F value Pr(>F)
## method         1      1  0.5344   0.931  0.335
## SPP            2      2  0.8128   1.416  0.243
## method:SPP     2      1  0.5398   0.941  0.390
## Residuals   6774   3888  0.5739
anova_results5 <- aov(value~method + SPP, data = df_long3)
summary(anova_results5)
##               Df Sum Sq Mean Sq F value Pr(>F)
## method         1      1  0.5344   0.931  0.335
## SPP            2      2  0.8128   1.416  0.243
## Residuals   6776   3889  0.5739
anova_results6 <- aov(value~method, data = df_long3)
summary(anova_results6)
##               Df Sum Sq Mean Sq F value Pr(>F)
## method         1      1  0.5344   0.931  0.335
## Residuals   6778   3890  0.5740

Bland-Altman Plot

Used to assess agreement between two measurement methods.

Plots

BD/HD1

This analysis will only use BD1 and HD1 for both digital and manual measurements.

# requires wide data frame

library(ggplot2)

#compute averages and differences for Bland-Altman

df_ba <- df_wide %>%
  mutate(
    average = (dig + man) / 2,
    difference = dig - man
  )


#### Plot for whole dataset

ba_full <- ggplot(df_ba, aes(x = average, y = difference)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full

The Bland-Altman plot for the whole dataset shows disticnt clusters. This is most likely caused from the differences in range for the different characteristic types. We will look at species and characteristics to see which influences the clustering.

##### Species

ba_full_SPP <- ggplot(df_ba, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full_SPP

##### Characteristics

ba_full_CR <- ggplot(df_ba, aes(x = average, y = difference, color=characteristic)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full_CR

We indeed see that characteristics drive the clustering. We also see that the data for Pre-dorsal length (PreDL) hover at and below the lower limit of agreement. This would suggest either a measurement bias in one/both of the measurement methods, or a biological difference, where certain species, sizes, or specific characteristics might be harder to measure accurately with one method compared to the other.

We’ll take a look at Bland-Altman plots for each characteristic now.

ba_by_char <- ggplot(df_ba, aes(x = average, y = difference)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  facet_wrap(~ characteristic, scales = "free") +
  theme_minimal()

ba_by_char

### Check if there are species differences

ba_CR_SPP <- ggplot(df_ba, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  facet_wrap(~ characteristic, scales = "free") +
  theme_minimal()

ba_CR_SPP

As before, we see that PreDL is at or below the lower limit of agreement. When colored by species, we don’t see any distinct clustering causing this positioning. Likewise, there are no distinct species clustering in any of the characteristics, with only a handful of outliers in each characteristic. This may be due to some of the individual fish being MUCH larger than the others.

BD/HD2

This analysis will use BD2 and HD2 for digital, but BD1 and HD1 for manual measurements.

# requires wide data frame

#compute averages and differences for Bland-Altman

df_ba2 <- df_wide2 %>%
  mutate(
    average = (dig + man) / 2,
    difference = dig - man
  )

#### Plot for whole dataset

ba_full2 <- ggplot(df_ba2, aes(x = average, y = difference)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full2

The Bland-Altman plot for the whole dataset shows disticnt clusters. This is most likely caused from the differences in range for the different characteristic types. We will look at species and characteristics to see which influences the clustering.

##### Species

ba_full_SPP2 <- ggplot(df_ba2, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full_SPP2

##### Characteristics

ba_full_CR2 <- ggplot(df_ba2, aes(x = average, y = difference, color=characteristic)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full_CR2

We indeed see that characteristics drive the clustering. We also see that the data for Pre-dorsal length (PreDL) hover at and below the lower limit of agreement. This would suggest either a measurement bias in one/both of the measurement methods, or a biological difference, where certain species, sizes, or specific characteristics might be harder to measure accurately with one method compared to the other.

We’ll take a look at Bland-Altman plots for each characteristic now.

ba_by_char2 <- ggplot(df_ba2, aes(x = average, y = difference)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  facet_wrap(~ characteristic, scales = "free") +
  theme_minimal()

ba_by_char2

### Check if there are species differences

ba_CR_SPP2 <- ggplot(df_ba2, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  facet_wrap(~ characteristic, scales = "free") +
  theme_minimal()

ba_CR_SPP2

We’re really only interested in how BD1, HD1, BD2, and HD2 compare, so let’s isolate those.

data_BD1 <- df_wide %>%
  filter(characteristic == "BD1")
data_BD1 <- data_BD1 %>%
  mutate(
    average = (dig + man) / 2,
    difference = dig - man
  )

data_HD1 <- df_wide %>%
  filter(characteristic == "HD1")
data_HD1 <- data_HD1 %>%
  mutate(
    average = (dig + man) / 2,
    difference = dig - man
  )


data_BD2 <- df_wide2 %>%
  filter(characteristic == "BD")
data_BD2 <- data_BD2 %>%
  mutate(
    average = (dig + man) / 2,
    difference = dig - man
  )


data_HD2 <- df_wide2 %>%
  filter(characteristic == "HD")
data_HD2 <- data_HD2 %>%
  mutate(
    average = (dig + man) / 2,
    difference = dig - man
  )


ba_BD1 <- ggplot(data_BD1, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(data_BD1$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(data_BD1$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(data_BD1$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "BD1",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()


ba_HD1 <- ggplot(data_HD1, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(data_HD1$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(data_HD1$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(data_HD1$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "HD1",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_BD2 <- ggplot(data_BD2, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(data_BD2$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(data_BD2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(data_BD2$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "BD2",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()


ba_HD2 <- ggplot(data_HD2, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(data_HD2$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(data_HD2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(data_HD2$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "HD2",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(ba_BD1, ba_BD2, ba_HD1, ba_HD2)

We see that BD1 has less scatter, less outside LOA and a smaller bias compared to BD2 (0.12 vs 0.36). In addition, BD2’s LOA don’t include 0, so BD1 is more appropriate to use. In contrast, HD1, while it has less scatter with fewer points outside the LOA, it has a bias further away from 0 compared to HD2 (which has slightly more scatter and more points outside LOA), so HD2 is more appropriate to use (0.06 vs -0.008).

I will therefore create a final dataset with BD1 and HD2, and re-run the ANOVAs and overall Bland-Altman plots.

Re-run

Rep-measures MANOVA

Shows a significant difference between methods (p<0.0001) BUT when we calculate effect sizes, the effect size is SUPER small (0.00132). This suggests that while there is a detectable difference between the means of the two methods, this differences is negligible.

library(car)
raw3 <- raw2 %>%
  rename(HD.dig = HD2.dig, HD.man = HD1.man)

char <- cbind(raw3$SL.dig, raw3$BD1.dig, raw3$CPD.dig, raw3$CPL.dig, raw3$PreDL.dig, raw3$DbL.dig, raw3$HL.dig, raw3$HD.dig, raw3$SnL.dig, raw3$OL.dig, raw3$SL.man, raw3$BD1.man, raw3$CPD.man, raw3$CPL.man, raw3$PreDL.man, raw3$DbL.man, raw3$HL.man, raw3$HD.man, raw3$SnL.man, raw3$OL.man)

method <- factor(c(rep("dig", 10), rep("man", 10)))
characteristics <- factor(c(
  "SL", "BD1", "CPD", "CPL", "PreDL", "DbL", "HL", "HD", "SnL", "OL"
))

manova_model <- Anova(
  lm(char ~ 1),  # No predictors, as we're only modeling within-subject effects
  idata = data.frame(method = method, characteristics = characteristics),
  idesign = ~ method * characteristics,
  type = "III"
)

# View results
summary(manova_model, multivariate = TRUE)
## 
## Type III Repeated Measures MANOVA Tests:
## 
## ------------------------------------------
##  
## Term: (Intercept) 
## 
##  Response transformation matrix:
##       (Intercept)
##  [1,]           1
##  [2,]           1
##  [3,]           1
##  [4,]           1
##  [5,]           1
##  [6,]           1
##  [7,]           1
##  [8,]           1
##  [9,]           1
## [10,]           1
## [11,]           1
## [12,]           1
## [13,]           1
## [14,]           1
## [15,]           1
## [16,]           1
## [17,]           1
## [18,]           1
## [19,]           1
## [20,]           1
## 
## Sum of squares and products for the hypothesis:
##             (Intercept)
## (Intercept)    603957.1
## 
## Multivariate Tests: (Intercept)
##                  Df test stat approx F num Df den Df     Pr(>F)    
## Pillai            1   0.99254 44991.22      1    338 < 2.22e-16 ***
## Wilks             1   0.00746 44991.22      1    338 < 2.22e-16 ***
## Hotelling-Lawley  1 133.11011 44991.22      1    338 < 2.22e-16 ***
## Roy               1 133.11011 44991.22      1    338 < 2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------
##  
## Term: method 
## 
##  Response transformation matrix:
##       method1
##  [1,]       1
##  [2,]       1
##  [3,]       1
##  [4,]       1
##  [5,]       1
##  [6,]       1
##  [7,]       1
##  [8,]       1
##  [9,]       1
## [10,]       1
## [11,]      -1
## [12,]      -1
## [13,]      -1
## [14,]      -1
## [15,]      -1
## [16,]      -1
## [17,]      -1
## [18,]      -1
## [19,]      -1
## [20,]      -1
## 
## Sum of squares and products for the hypothesis:
##          method1
## method1 57.70577
## 
## Multivariate Tests: method
##                  Df test stat approx F num Df den Df     Pr(>F)    
## Pillai            1 0.4467077 272.8886      1    338 < 2.22e-16 ***
## Wilks             1 0.5532923 272.8886      1    338 < 2.22e-16 ***
## Hotelling-Lawley  1 0.8073628 272.8886      1    338 < 2.22e-16 ***
## Roy               1 0.8073628 272.8886      1    338 < 2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------
##  
## Term: characteristics 
## 
##  Response transformation matrix:
##       characteristics1 characteristics2 characteristics3 characteristics4
##  [1,]                0                0                0                0
##  [2,]                1                0                0                0
##  [3,]                0                1                0                0
##  [4,]                0                0                1                0
##  [5,]                0                0                0                0
##  [6,]                0                0                0                1
##  [7,]                0                0                0                0
##  [8,]                0                0                0                0
##  [9,]               -1               -1               -1               -1
## [10,]                0                0                0                0
## [11,]                0                0                0                0
## [12,]                1                0                0                0
## [13,]                0                1                0                0
## [14,]                0                0                1                0
## [15,]                0                0                0                0
## [16,]                0                0                0                1
## [17,]                0                0                0                0
## [18,]                0                0                0                0
## [19,]               -1               -1               -1               -1
## [20,]                0                0                0                0
##       characteristics5 characteristics6 characteristics7 characteristics8
##  [1,]                0                0                0                0
##  [2,]                0                0                0                0
##  [3,]                0                0                0                0
##  [4,]                0                0                0                0
##  [5,]                0                0                0                1
##  [6,]                0                0                0                0
##  [7,]                0                1                0                0
##  [8,]                1                0                0                0
##  [9,]               -1               -1               -1               -1
## [10,]                0                0                1                0
## [11,]                0                0                0                0
## [12,]                0                0                0                0
## [13,]                0                0                0                0
## [14,]                0                0                0                0
## [15,]                0                0                0                1
## [16,]                0                0                0                0
## [17,]                0                1                0                0
## [18,]                1                0                0                0
## [19,]               -1               -1               -1               -1
## [20,]                0                0                1                0
##       characteristics9
##  [1,]                1
##  [2,]                0
##  [3,]                0
##  [4,]                0
##  [5,]                0
##  [6,]                0
##  [7,]                0
##  [8,]                0
##  [9,]               -1
## [10,]                0
## [11,]                1
## [12,]                0
## [13,]                0
## [14,]                0
## [15,]                0
## [16,]                0
## [17,]                0
## [18,]                0
## [19,]               -1
## [20,]                0
## 
## Sum of squares and products for the hypothesis:
##                  characteristics1 characteristics2 characteristics3
## characteristics1        3009.7137         1774.831        2874.5166
## characteristics2        1774.8312         1046.620        1695.1053
## characteristics3        2874.5166         1695.105        2745.3926
## characteristics4        2111.6622         1245.249        2016.8058
## characteristics5        2101.6229         1239.329        2007.2174
## characteristics6        2209.9300         1303.198        2110.6594
## characteristics7         265.9196          156.813         253.9744
## characteristics8        3539.4828         2087.237        3380.4883
## characteristics9        5237.3119         3088.448        5002.0504
##                  characteristics4 characteristics5 characteristics6
## characteristics1        2111.6622        2101.6229        2209.9300
## characteristics2        1245.2493        1239.3291        1303.1979
## characteristics3        2016.8058        2007.2174        2110.6594
## characteristics4        1481.5752        1474.5315        1550.5214
## characteristics5        1474.5315        1467.5212        1543.1499
## characteristics6        1550.5214        1543.1499        1622.6761
## characteristics7         186.5733         185.6863         195.2557
## characteristics8        2483.3565        2471.5500        2598.9213
## characteristics9        3674.5799        3657.1101        3845.5792
##                  characteristics7 characteristics8 characteristics9
## characteristics1         265.9196        3539.4828        5237.3119
## characteristics2         156.8130        2087.2366        3088.4481
## characteristics3         253.9744        3380.4883        5002.0504
## characteristics4         186.5733        2483.3565        3674.5799
## characteristics5         185.6863        2471.5500        3657.1101
## characteristics6         195.2557        2598.9213        3845.5792
## characteristics7          23.4950         312.7267         462.7363
## characteristics8         312.7267        4162.5017        6159.1822
## characteristics9         462.7363        6159.1822        9113.6360
## 
## Multivariate Tests: characteristics
##                  Df test stat approx F num Df den Df     Pr(>F)    
## Pillai            1     1.000 180274.6      9    330 < 2.22e-16 ***
## Wilks             1     0.000 180274.6      9    330 < 2.22e-16 ***
## Hotelling-Lawley  1  4916.579 180274.6      9    330 < 2.22e-16 ***
## Roy               1  4916.579 180274.6      9    330 < 2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------
##  
## Term: method:characteristics 
## 
##  Response transformation matrix:
##       method1:characteristics1 method1:characteristics2
##  [1,]                        0                        0
##  [2,]                        1                        0
##  [3,]                        0                        1
##  [4,]                        0                        0
##  [5,]                        0                        0
##  [6,]                        0                        0
##  [7,]                        0                        0
##  [8,]                        0                        0
##  [9,]                       -1                       -1
## [10,]                        0                        0
## [11,]                        0                        0
## [12,]                       -1                        0
## [13,]                        0                       -1
## [14,]                        0                        0
## [15,]                        0                        0
## [16,]                        0                        0
## [17,]                        0                        0
## [18,]                        0                        0
## [19,]                        1                        1
## [20,]                        0                        0
##       method1:characteristics3 method1:characteristics4
##  [1,]                        0                        0
##  [2,]                        0                        0
##  [3,]                        0                        0
##  [4,]                        1                        0
##  [5,]                        0                        0
##  [6,]                        0                        1
##  [7,]                        0                        0
##  [8,]                        0                        0
##  [9,]                       -1                       -1
## [10,]                        0                        0
## [11,]                        0                        0
## [12,]                        0                        0
## [13,]                        0                        0
## [14,]                       -1                        0
## [15,]                        0                        0
## [16,]                        0                       -1
## [17,]                        0                        0
## [18,]                        0                        0
## [19,]                        1                        1
## [20,]                        0                        0
##       method1:characteristics5 method1:characteristics6
##  [1,]                        0                        0
##  [2,]                        0                        0
##  [3,]                        0                        0
##  [4,]                        0                        0
##  [5,]                        0                        0
##  [6,]                        0                        0
##  [7,]                        0                        1
##  [8,]                        1                        0
##  [9,]                       -1                       -1
## [10,]                        0                        0
## [11,]                        0                        0
## [12,]                        0                        0
## [13,]                        0                        0
## [14,]                        0                        0
## [15,]                        0                        0
## [16,]                        0                        0
## [17,]                        0                       -1
## [18,]                       -1                        0
## [19,]                        1                        1
## [20,]                        0                        0
##       method1:characteristics7 method1:characteristics8
##  [1,]                        0                        0
##  [2,]                        0                        0
##  [3,]                        0                        0
##  [4,]                        0                        0
##  [5,]                        0                        1
##  [6,]                        0                        0
##  [7,]                        0                        0
##  [8,]                        0                        0
##  [9,]                       -1                       -1
## [10,]                        1                        0
## [11,]                        0                        0
## [12,]                        0                        0
## [13,]                        0                        0
## [14,]                        0                        0
## [15,]                        0                       -1
## [16,]                        0                        0
## [17,]                        0                        0
## [18,]                        0                        0
## [19,]                        1                        1
## [20,]                       -1                        0
##       method1:characteristics9
##  [1,]                        1
##  [2,]                        0
##  [3,]                        0
##  [4,]                        0
##  [5,]                        0
##  [6,]                        0
##  [7,]                        0
##  [8,]                        0
##  [9,]                       -1
## [10,]                        0
## [11,]                       -1
## [12,]                        0
## [13,]                        0
## [14,]                        0
## [15,]                        0
## [16,]                        0
## [17,]                        0
## [18,]                        0
## [19,]                        1
## [20,]                        0
## 
## Sum of squares and products for the hypothesis:
##                          method1:characteristics1 method1:characteristics2
## method1:characteristics1                32.697209                 30.67555
## method1:characteristics2                30.675546                 28.77888
## method1:characteristics3                34.815625                 32.66298
## method1:characteristics4                18.166843                 17.04359
## method1:characteristics5                19.154595                 17.97027
## method1:characteristics6                 4.885489                  4.58342
## method1:characteristics7                19.017180                 17.84135
## method1:characteristics8               -28.692680                -26.91862
## method1:characteristics9                25.653473                 24.06732
##                          method1:characteristics3 method1:characteristics4
## method1:characteristics1                34.815625                18.166843
## method1:characteristics2                32.662981                17.043590
## method1:characteristics3                37.071291                19.343852
## method1:characteristics4                19.343852                10.093650
## method1:characteristics5                20.395600                10.642453
## method1:characteristics6                 5.202014                 2.714418
## method1:characteristics7                20.249282                10.566104
## method1:characteristics8               -30.551647               -15.941893
## method1:characteristics9                27.315534                14.253284
##                          method1:characteristics5 method1:characteristics6
## method1:characteristics1                19.154595                4.8854885
## method1:characteristics2                17.970270                4.5834196
## method1:characteristics3                20.395600                5.2020139
## method1:characteristics4                10.642453                2.7144183
## method1:characteristics5                11.221096                2.8620044
## method1:characteristics6                 2.862004                0.7299705
## method1:characteristics7                11.140595                2.8414723
## method1:characteristics8               -16.808672               -4.2871475
## method1:characteristics9                15.028252                3.8330412
##                          method1:characteristics7 method1:characteristics8
## method1:characteristics1                19.017180               -28.692680
## method1:characteristics2                17.841351               -26.918617
## method1:characteristics3                20.249282               -30.551647
## method1:characteristics4                10.566104               -15.941893
## method1:characteristics5                11.140595               -16.808672
## method1:characteristics6                 2.841472                -4.287148
## method1:characteristics7                11.060673               -16.688087
## method1:characteristics8               -16.688087                25.178598
## method1:characteristics9                14.920439               -22.511613
##                          method1:characteristics9
## method1:characteristics1                25.653473
## method1:characteristics2                24.067324
## method1:characteristics3                27.315534
## method1:characteristics4                14.253284
## method1:characteristics5                15.028252
## method1:characteristics6                 3.833041
## method1:characteristics7                14.920439
## method1:characteristics8               -22.511613
## method1:characteristics9                20.127122
## 
## Multivariate Tests: method:characteristics
##                  Df test stat approx F num Df den Df     Pr(>F)    
## Pillai            1   0.98321 2147.159      9    330 < 2.22e-16 ***
## Wilks             1   0.01679 2147.159      9    330 < 2.22e-16 ***
## Hotelling-Lawley  1  58.55888 2147.159      9    330 < 2.22e-16 ***
## Roy               1  58.55888 2147.159      9    330 < 2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                         Sum Sq num Df Error SS den Df  F value    Pr(>F)    
## (Intercept)            30197.9      1  226.864    338 44991.22 < 2.2e-16 ***
## method                     2.9      1    3.574    338   272.89 < 2.2e-16 ***
## characteristics         3452.6      9   59.466   3042 19624.15 < 2.2e-16 ***
## method:characteristics    51.1      9   16.041   3042  1076.43 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                        Test statistic     p-value
## characteristics             0.0004014  0.0000e+00
## method:characteristics      0.0083715 5.7648e-307
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                         GG eps Pr(>F[GG])    
## characteristics        0.31317  < 2.2e-16 ***
## method:characteristics 0.48496  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                           HF eps Pr(>F[HF])
## characteristics        0.3160713          0
## method:characteristics 0.4920936          0
pillai_method <- 0.4467077
num_df_method <- 1
den_df_method <- 338

# Calculate partial eta-squared for Method
eta_squared_method <- pillai_method / (pillai_method + (den_df_method / num_df_method))

# Print the result
cat("Partial Eta-Squared (Method):", eta_squared_method, "\n")
## Partial Eta-Squared (Method): 0.001319876

Repeated measures ANOVA

Shows a significant difference between methods (p<0.0001) with a large effect size (0.447) BUT this is on all the data. Not sure if this makes sense to do. Would be better to run repeated measures ANOVA on one trait at a time…

anova_fin <- aov(value~method + Error(ID/method), data=df_long_fin)
summary(anova_fin)
## 
## Error: ID
##            Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 338  226.9  0.6712               
## 
## Error: ID:method
##            Df Sum Sq Mean Sq F value Pr(>F)    
## method      1  2.885  2.8853   272.9 <2e-16 ***
## Residuals 338  3.574  0.0106                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: Within
##             Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 6102   3579  0.5866
SS_method <- 2.885  # Sum of squares for method
SS_error <- 3.574   # Sum of squares for residuals (error) from ID:method

# Calculate Partial Eta-Squared for Method
eta_squared_method <- SS_method / (SS_method + SS_error)

# Print the result
cat("Partial Eta-Squared (Method):", eta_squared_method, "\n")
## Partial Eta-Squared (Method): 0.4466636

Bland-Alt plots

df_ba_fin <- df_wide_fin %>%
  mutate(
    average = (dig + man) / 2,
    difference = dig - man
  )

ba_full_fin <- ggplot(df_ba_fin, aes(x = average, y = difference)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba_fin$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full_fin

##### Species

ba_full_SPP_fin <- ggplot(df_ba_fin, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba_fin$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full_SPP_fin

##### Characteristics

ba_full_CR_fin <- ggplot(df_ba_fin, aes(x = average, y = difference, color=characteristic)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba_fin$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_full_CR_fin

##### individual characteristics

ba_by_char_fin <- ggplot(df_ba_fin, aes(x = average, y = difference)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba_fin$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  facet_wrap(~ characteristic, scales = "free") +
  theme_minimal()

ba_by_char_fin

Stats

All together

NOTE: this looping below isn’t working because the structure of ‘stats’ does not contain the p-value (even though printing stats does show a p-value). Contacted package author for more assistance. unique_characteristics <- unique(df_long$characteristic)

Function to apply blandr.statistics for each characteristic and extract the relevant stats

statistics_list <- lapply(unique_characteristics, function(char) {

# Filter data for the current characteristic (two rows, one for each method) df_char <- df_long %>% filter(characteristic == char)

# Apply blandr.statistics function to the Digital and Hand values stats <- blandr.statistics( x = df_char\(value[df_char\)method == “dig”], y = df_char\(value[df_char\)method == “man”] )

# Extract desired statistics from the results p_value <- stats\(p_value mean_diff <- stats\)bias lower_limit <- stats\(lowerLOA upper_limit <- stats\)upperLOA

# Return a data frame with the statistics for this characteristic return(data.frame( Characteristic = char, p_value = p_value, mean_difference = mean_diff, mean = mean, lower_limit = lower_limit, upper_limit = upper_limit )) })

Individual

Literally all (but like HD2 and OL) are significant. HOWEVER, the bias is INCREDIBLY close to zero (suggesting no difference between the methods) and the LOAs include 0 (which also suggest that no difference between the methods is possible).

  • BD2 LOA does not contain 0 (0.1-0.6)
  • PreDL LOA does not contain 0 (-0.7 to -0.2)
library(blandr)
## Warning: package 'blandr' was built under R version 4.4.2
### Whole dataset

(bar_full <- blandr.statistics(df_wide$dig, df_wide$man))
## Bland-Altman Statistics
## =======================
## t = -9.7766, df = 3389, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  3390 
## Maximum value for average measures:  4.042436 
## Minimum value for average measures:  0.3834636 
## Maximum value for difference in measures:  0.5562302 
## Minimum value for difference in measures:  -0.9342704 
## 
## Bias:  -0.03445528 
## Standard deviation of bias:  0.2051953 
## 
## Standard error of bias:  0.003524258 
## Standard error for limits of agreement:  0.00602359 
## 
## Bias:  -0.03445528 
## Bias- upper 95% CI:  -0.02754539 
## Bias- lower 95% CI:  -0.04136516 
## 
## Upper limit of agreement:  0.3677276 
## Upper LOA- upper 95% CI:  0.3795378 
## Upper LOA- lower 95% CI:  0.3559173 
## 
## Lower limit of agreement:  -0.4366381 
## Lower LOA- upper 95% CI:  -0.4248279 
## Lower LOA- lower 95% CI:  -0.4484484 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -2.604907 
## Point estimate of bias as proportion of lowest average:  -8.985281 
## Point estimate of bias as proportion of highest average -0.8523395 
## Spread of data between lower and upper LoAs:  0.8043657 
## Bias as proportion of LoA spread:  -4.283534 
## 
## =======================
## Bias: 
##  -0.03445528  ( -0.04136516  to  -0.02754539 ) 
## ULoA: 
##  0.3677276  ( 0.3559173  to  0.3795378 ) 
## LLoA: 
##  -0.4366381  ( -0.4484484  to  -0.4248279 )
(bar_full2 <- blandr.statistics(df_wide2$dig, df_wide2$man))
## Bland-Altman Statistics
## =======================
## t = -4.3957, df = 3389, p-value = 1.138e-05
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  3390 
## Maximum value for average measures:  4.042436 
## Minimum value for average measures:  0.3834636 
## Maximum value for difference in measures:  0.746832 
## Minimum value for difference in measures:  -0.9342704 
## 
## Bias:  -0.01775692 
## Standard deviation of bias:  0.2352018 
## 
## Standard error of bias:  0.004039623 
## Standard error for limits of agreement:  0.006904443 
## 
## Bias:  -0.01775692 
## Bias- upper 95% CI:  -0.009836577 
## Bias- lower 95% CI:  -0.02567727 
## 
## Upper limit of agreement:  0.4432387 
## Upper LOA- upper 95% CI:  0.456776 
## Upper LOA- lower 95% CI:  0.4297014 
## 
## Lower limit of agreement:  -0.4787525 
## Lower LOA- upper 95% CI:  -0.4652152 
## Lower LOA- lower 95% CI:  -0.4922898 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -2.042943 
## Point estimate of bias as proportion of lowest average:  -4.630667 
## Point estimate of bias as proportion of highest average -0.4392629 
## Spread of data between lower and upper LoAs:  0.9219912 
## Bias as proportion of LoA spread:  -1.925932 
## 
## =======================
## Bias: 
##  -0.01775692  ( -0.02567727  to  -0.009836577 ) 
## ULoA: 
##  0.4432387  ( 0.4297014  to  0.456776 ) 
## LLoA: 
##  -0.4787525  ( -0.4922898  to  -0.4652152 )
### By characteristic

df_SL <- df_wide %>%
  filter(characteristic == "SL")
(bar_SL <- blandr.statistics(df_SL$dig, df_SL$man))
## Bland-Altman Statistics
## =======================
## t = 23.922, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  4.042436 
## Minimum value for average measures:  2.960844 
## Maximum value for difference in measures:  0.1785351 
## Minimum value for difference in measures:  -0.2129669 
## 
## Bias:  0.05387797 
## Standard deviation of bias:  0.04146884 
## 
## Standard error of bias:  0.002252278 
## Standard error for limits of agreement:  0.003852918 
## 
## Bias:  0.05387797 
## Bias- upper 95% CI:  0.05830822 
## Bias- lower 95% CI:  0.04944772 
## 
## Upper limit of agreement:  0.1351569 
## Upper LOA- upper 95% CI:  0.1427356 
## Upper LOA- lower 95% CI:  0.1275782 
## 
## Lower limit of agreement:  -0.02740096 
## Lower LOA- upper 95% CI:  -0.01982224 
## Lower LOA- lower 95% CI:  -0.03497967 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  1.517899 
## Point estimate of bias as proportion of lowest average:  1.819683 
## Point estimate of bias as proportion of highest average 1.332809 
## Spread of data between lower and upper LoAs:  0.1625579 
## Bias as proportion of LoA spread:  33.14387 
## 
## =======================
## Bias: 
##  0.05387797  ( 0.04944772  to  0.05830822 ) 
## ULoA: 
##  0.1351569  ( 0.1275782  to  0.1427356 ) 
## LLoA: 
##  -0.02740096  ( -0.03497967  to  -0.01982224 )
df_BD1 <- df_wide %>%
  filter(characteristic == "BD1")
(bar_BD1 <- blandr.statistics(df_BD1$dig, df_BD1$man))
## Bland-Altman Statistics
## =======================
## t = 30.615, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  3.121966 
## Minimum value for average measures:  1.748914 
## Maximum value for difference in measures:  0.4951842 
## Minimum value for difference in measures:  -0.2418473 
## 
## Bias:  0.1207813 
## Standard deviation of bias:  0.07263929 
## 
## Standard error of bias:  0.003945225 
## Standard error for limits of agreement:  0.006749001 
## 
## Bias:  0.1207813 
## Bias- upper 95% CI:  0.1285416 
## Bias- lower 95% CI:  0.113021 
## 
## Upper limit of agreement:  0.2631543 
## Upper LOA- upper 95% CI:  0.2764297 
## Upper LOA- lower 95% CI:  0.249879 
## 
## Lower limit of agreement:  -0.02159169 
## Lower LOA- upper 95% CI:  -0.00831636 
## Lower LOA- lower 95% CI:  -0.03486703 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  4.995667 
## Point estimate of bias as proportion of lowest average:  6.906076 
## Point estimate of bias as proportion of highest average 3.868758 
## Spread of data between lower and upper LoAs:  0.284746 
## Bias as proportion of LoA spread:  42.41721 
## 
## =======================
## Bias: 
##  0.1207813  ( 0.113021  to  0.1285416 ) 
## ULoA: 
##  0.2631543  ( 0.249879  to  0.2764297 ) 
## LLoA: 
##  -0.02159169  ( -0.03486703  to  -0.00831636 )
df_BD2 <- df_wide2 %>%
  filter(characteristic == "BD")
(bar_BD2 <- blandr.statistics(df_BD2$dig, df_BD2$man))
## Bland-Altman Statistics
## =======================
## t = 52.87, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  3.163316 
## Minimum value for average measures:  1.876811 
## Maximum value for difference in measures:  0.746832 
## Minimum value for difference in measures:  -0.09538079 
## 
## Bias:  0.3557936 
## Standard deviation of bias:  0.1239048 
## 
## Standard error of bias:  0.006729585 
## Standard error for limits of agreement:  0.01151214 
## 
## Bias:  0.3557936 
## Bias- upper 95% CI:  0.3690308 
## Bias- lower 95% CI:  0.3425565 
## 
## Upper limit of agreement:  0.598647 
## Upper LOA- upper 95% CI:  0.6212915 
## Upper LOA- lower 95% CI:  0.5760026 
## 
## Lower limit of agreement:  0.1129402 
## Lower LOA- upper 95% CI:  0.1355847 
## Lower LOA- lower 95% CI:  0.09029573 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  14.04186 
## Point estimate of bias as proportion of lowest average:  18.95735 
## Point estimate of bias as proportion of highest average 11.24749 
## Spread of data between lower and upper LoAs:  0.4857068 
## Bias as proportion of LoA spread:  73.25275 
## 
## =======================
## Bias: 
##  0.3557936  ( 0.3425565  to  0.3690308 ) 
## ULoA: 
##  0.598647  ( 0.5760026  to  0.6212915 ) 
## LLoA: 
##  0.1129402  ( 0.09029573  to  0.1355847 )
df_CPD <- df_wide %>%
  filter(characteristic == "CPD")
(bar_CPD <- blandr.statistics(df_CPD$dig, df_CPD$man))
## Bland-Altman Statistics
## =======================
## t = 27.399, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  2.471218 
## Minimum value for average measures:  1.170403 
## Maximum value for difference in measures:  0.5562302 
## Minimum value for difference in measures:  -0.1550393 
## 
## Bias:  0.101579 
## Standard deviation of bias:  0.06825963 
## 
## Standard error of bias:  0.003707354 
## Standard error for limits of agreement:  0.006342082 
## 
## Bias:  0.101579 
## Bias- upper 95% CI:  0.1088714 
## Bias- lower 95% CI:  0.09428661 
## 
## Upper limit of agreement:  0.2353679 
## Upper LOA- upper 95% CI:  0.2478428 
## Upper LOA- lower 95% CI:  0.222893 
## 
## Lower limit of agreement:  -0.03220987 
## Lower LOA- upper 95% CI:  -0.01973495 
## Lower LOA- lower 95% CI:  -0.04468479 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  5.715623 
## Point estimate of bias as proportion of lowest average:  8.678974 
## Point estimate of bias as proportion of highest average 4.110483 
## Spread of data between lower and upper LoAs:  0.2675778 
## Bias as proportion of LoA spread:  37.96243 
## 
## =======================
## Bias: 
##  0.101579  ( 0.09428661  to  0.1088714 ) 
## ULoA: 
##  0.2353679  ( 0.222893  to  0.2478428 ) 
## LLoA: 
##  -0.03220987  ( -0.04468479  to  -0.01973495 )
df_CPL <- df_wide %>%
  filter(characteristic == "CPL")
(bar_CPL <- blandr.statistics(df_CPL$dig, df_CPL$man))
## Bland-Altman Statistics
## =======================
## t = 30.741, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  2.890711 
## Minimum value for average measures:  1.76216 
## Maximum value for difference in measures:  0.3986366 
## Minimum value for difference in measures:  -0.19231 
## 
## Bias:  0.1409026 
## Standard deviation of bias:  0.08439069 
## 
## Standard error of bias:  0.004583473 
## Standard error for limits of agreement:  0.007840837 
## 
## Bias:  0.1409026 
## Bias- upper 95% CI:  0.1499183 
## Bias- lower 95% CI:  0.1318869 
## 
## Upper limit of agreement:  0.3063084 
## Upper LOA- upper 95% CI:  0.3217314 
## Upper LOA- lower 95% CI:  0.2908854 
## 
## Lower limit of agreement:  -0.02450314 
## Lower LOA- upper 95% CI:  -0.00908016 
## Lower LOA- lower 95% CI:  -0.03992613 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  5.963766 
## Point estimate of bias as proportion of lowest average:  7.996017 
## Point estimate of bias as proportion of highest average 4.874324 
## Spread of data between lower and upper LoAs:  0.3308115 
## Bias as proportion of LoA spread:  42.59302 
## 
## =======================
## Bias: 
##  0.1409026  ( 0.1318869  to  0.1499183 ) 
## ULoA: 
##  0.3063084  ( 0.2908854  to  0.3217314 ) 
## LLoA: 
##  -0.02450314  ( -0.03992613  to  -0.00908016 )
df_PreDL <- df_wide %>%
  filter(characteristic == "PreDL")
(bar_PreDL <- blandr.statistics(df_PreDL$dig, df_PreDL$man))
## Bland-Altman Statistics
## =======================
## t = -75.204, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  3.204808 
## Minimum value for average measures:  2.05628 
## Maximum value for difference in measures:  0.2376452 
## Minimum value for difference in measures:  -0.9116021 
## 
## Bias:  -0.4623168 
## Standard deviation of bias:  0.113188 
## 
## Standard error of bias:  0.006147531 
## Standard error for limits of agreement:  0.01051643 
## 
## Bias:  -0.4623168 
## Bias- upper 95% CI:  -0.4502246 
## Bias- lower 95% CI:  -0.4744091 
## 
## Upper limit of agreement:  -0.2404683 
## Upper LOA- upper 95% CI:  -0.2197824 
## Upper LOA- lower 95% CI:  -0.2611542 
## 
## Lower limit of agreement:  -0.6841654 
## Lower LOA- upper 95% CI:  -0.6634795 
## Lower LOA- lower 95% CI:  -0.7048513 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -17.19129 
## Point estimate of bias as proportion of lowest average:  -22.48317 
## Point estimate of bias as proportion of highest average -14.42572 
## Spread of data between lower and upper LoAs:  0.4436971 
## Bias as proportion of LoA spread:  -104.1965 
## 
## =======================
## Bias: 
##  -0.4623168  ( -0.4744091  to  -0.4502246 ) 
## ULoA: 
##  -0.2404683  ( -0.2611542  to  -0.2197824 ) 
## LLoA: 
##  -0.6841654  ( -0.7048513  to  -0.6634795 )
df_DbL <- df_wide %>%
  filter(characteristic == "DbL")
(bar_DbL <- blandr.statistics(df_DbL$dig, df_DbL$man))
## Bland-Altman Statistics
## =======================
## t = -2.8434, df = 338, p-value = 0.004736
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  2.786619 
## Minimum value for average measures:  1.117524 
## Maximum value for difference in measures:  0.2886718 
## Minimum value for difference in measures:  -0.9342704 
## 
## Bias:  -0.01723212 
## Standard deviation of bias:  0.1115855 
## 
## Standard error of bias:  0.006060494 
## Standard error for limits of agreement:  0.01036754 
## 
## Bias:  -0.01723212 
## Bias- upper 95% CI:  -0.005311085 
## Bias- lower 95% CI:  -0.02915316 
## 
## Upper limit of agreement:  0.2014755 
## Upper LOA- upper 95% CI:  0.2218686 
## Upper LOA- lower 95% CI:  0.1810825 
## 
## Lower limit of agreement:  -0.2359398 
## Lower LOA- upper 95% CI:  -0.2155467 
## Lower LOA- lower 95% CI:  -0.2563328 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -1.003452 
## Point estimate of bias as proportion of lowest average:  -1.541991 
## Point estimate of bias as proportion of highest average -0.6183882 
## Spread of data between lower and upper LoAs:  0.4374153 
## Bias as proportion of LoA spread:  -3.939533 
## 
## =======================
## Bias: 
##  -0.01723212  ( -0.02915316  to  -0.005311085 ) 
## ULoA: 
##  0.2014755  ( 0.1810825  to  0.2218686 ) 
## LLoA: 
##  -0.2359398  ( -0.2563328  to  -0.2155467 )
df_HL <- df_wide %>%
  filter(characteristic == "HL")
(bar_HL <- blandr.statistics(df_HL$dig, df_HL$man))
## Bland-Altman Statistics
## =======================
## t = -16.151, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  2.565974 
## Minimum value for average measures:  1.480514 
## Maximum value for difference in measures:  0.528404 
## Minimum value for difference in measures:  -0.7045543 
## 
## Bias:  -0.1433821 
## Standard deviation of bias:  0.163451 
## 
## Standard error of bias:  0.008877438 
## Standard error for limits of agreement:  0.01518642 
## 
## Bias:  -0.1433821 
## Bias- upper 95% CI:  -0.1259201 
## Bias- lower 95% CI:  -0.1608441 
## 
## Upper limit of agreement:  0.1769818 
## Upper LOA- upper 95% CI:  0.2068536 
## Upper LOA- lower 95% CI:  0.14711 
## 
## Lower limit of agreement:  -0.463746 
## Lower LOA- upper 95% CI:  -0.4338742 
## Lower LOA- lower 95% CI:  -0.4936178 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -7.262377 
## Point estimate of bias as proportion of lowest average:  -9.684617 
## Point estimate of bias as proportion of highest average -5.587823 
## Spread of data between lower and upper LoAs:  0.6407278 
## Bias as proportion of LoA spread:  -22.378 
## 
## =======================
## Bias: 
##  -0.1433821  ( -0.1608441  to  -0.1259201 ) 
## ULoA: 
##  0.1769818  ( 0.14711  to  0.2068536 ) 
## LLoA: 
##  -0.463746  ( -0.4936178  to  -0.4338742 )
df_HD1 <- df_wide %>%
  filter(characteristic == "HD1")
(bar_HD1 <- blandr.statistics(df_HD1$dig, df_HD1$man))
## Bland-Altman Statistics
## =======================
## t = 14.044, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  2.601825 
## Minimum value for average measures:  1.451618 
## Maximum value for difference in measures:  0.4229753 
## Minimum value for difference in measures:  -0.2031489 
## 
## Bias:  0.06017856 
## Standard deviation of bias:  0.07889485 
## 
## Standard error of bias:  0.00428498 
## Standard error for limits of agreement:  0.007330212 
## 
## Bias:  0.06017856 
## Bias- upper 95% CI:  0.06860715 
## Bias- lower 95% CI:  0.05174997 
## 
## Upper limit of agreement:  0.2148125 
## Upper LOA- upper 95% CI:  0.229231 
## Upper LOA- lower 95% CI:  0.2003939 
## 
## Lower limit of agreement:  -0.09445534 
## Lower LOA- upper 95% CI:  -0.08003676 
## Lower LOA- lower 95% CI:  -0.1088739 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  3.007816 
## Point estimate of bias as proportion of lowest average:  4.14562 
## Point estimate of bias as proportion of highest average 2.312937 
## Spread of data between lower and upper LoAs:  0.3092678 
## Bias as proportion of LoA spread:  19.4584 
## 
## =======================
## Bias: 
##  0.06017856  ( 0.05174997  to  0.06860715 ) 
## ULoA: 
##  0.2148125  ( 0.2003939  to  0.229231 ) 
## LLoA: 
##  -0.09445534  ( -0.1088739  to  -0.08003676 )
df_HD2 <- df_wide2 %>%
  filter(characteristic == "HD")
(bar_HD2 <- blandr.statistics(df_HD2$dig, df_HD2$man))
## Bland-Altman Statistics
## =======================
## t = -1.3693, df = 338, p-value = 0.1718
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  2.566481 
## Minimum value for average measures:  1.391822 
## Maximum value for difference in measures:  0.4478337 
## Minimum value for difference in measures:  -0.2905665 
## 
## Bias:  -0.007850176 
## Standard deviation of bias:  0.1055546 
## 
## Standard error of bias:  0.005732938 
## Standard error for limits of agreement:  0.009807199 
## 
## Bias:  -0.007850176 
## Bias- upper 95% CI:  0.003426556 
## Bias- lower 95% CI:  -0.01912691 
## 
## Upper limit of agreement:  0.1990368 
## Upper LOA- upper 95% CI:  0.2183276 
## Upper LOA- lower 95% CI:  0.179746 
## 
## Lower limit of agreement:  -0.2147372 
## Lower LOA- upper 95% CI:  -0.1954463 
## Lower LOA- lower 95% CI:  -0.234028 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -0.4187308 
## Point estimate of bias as proportion of lowest average:  -0.5640215 
## Point estimate of bias as proportion of highest average -0.3058731 
## Spread of data between lower and upper LoAs:  0.413774 
## Bias as proportion of LoA spread:  -1.897213 
## 
## =======================
## Bias: 
##  -0.007850176  ( -0.01912691  to  0.003426556 ) 
## ULoA: 
##  0.1990368  ( 0.179746  to  0.2183276 ) 
## LLoA: 
##  -0.2147372  ( -0.234028  to  -0.1954463 )
df_SnL <- df_wide %>%
  filter(characteristic == "SnL")
(bar_SnL <- blandr.statistics(df_SnL$dig, df_SnL$man))
## Bland-Altman Statistics
## =======================
## t = -22.082, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  1.547597 
## Minimum value for average measures:  0.3834636 
## Maximum value for difference in measures:  0.2909172 
## Minimum value for difference in measures:  -0.7671152 
## 
## Bias:  -0.1897858 
## Standard deviation of bias:  0.158242 
## 
## Standard error of bias:  0.008594523 
## Standard error for limits of agreement:  0.01470244 
## 
## Bias:  -0.1897858 
## Bias- upper 95% CI:  -0.1728803 
## Bias- lower 95% CI:  -0.2066913 
## 
## Upper limit of agreement:  0.1203684 
## Upper LOA- upper 95% CI:  0.1492882 
## Upper LOA- lower 95% CI:  0.0914486 
## 
## Lower limit of agreement:  -0.49994 
## Lower LOA- upper 95% CI:  -0.4710202 
## Lower LOA- lower 95% CI:  -0.5288599 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -21.11482 
## Point estimate of bias as proportion of lowest average:  -49.49253 
## Point estimate of bias as proportion of highest average -12.26326 
## Spread of data between lower and upper LoAs:  0.6203085 
## Bias as proportion of LoA spread:  -30.59539 
## 
## =======================
## Bias: 
##  -0.1897858  ( -0.2066913  to  -0.1728803 ) 
## ULoA: 
##  0.1203684  ( 0.0914486  to  0.1492882 ) 
## LLoA: 
##  -0.49994  ( -0.5288599  to  -0.4710202 )
df_OL <- df_wide %>%
  filter(characteristic == "OL")
(bar_OL <- blandr.statistics(df_OL$dig, df_OL$man))
## Bland-Altman Statistics
## =======================
## t = -1.7645, df = 338, p-value = 0.07855
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  1.471467 
## Minimum value for average measures:  0.6198813 
## Maximum value for difference in measures:  0.4116851 
## Minimum value for difference in measures:  -0.3320279 
## 
## Bias:  -0.009155384 
## Standard deviation of bias:  0.09553187 
## 
## Standard error of bias:  0.005188579 
## Standard error for limits of agreement:  0.008875977 
## 
## Bias:  -0.009155384 
## Bias- upper 95% CI:  0.001050588 
## Bias- lower 95% CI:  -0.01936136 
## 
## Upper limit of agreement:  0.1780871 
## Upper LOA- upper 95% CI:  0.1955462 
## Upper LOA- lower 95% CI:  0.160628 
## 
## Lower limit of agreement:  -0.1963978 
## Lower LOA- upper 95% CI:  -0.1789387 
## Lower LOA- lower 95% CI:  -0.213857 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -0.6778975 
## Point estimate of bias as proportion of lowest average:  -1.476958 
## Point estimate of bias as proportion of highest average -0.6221944 
## Spread of data between lower and upper LoAs:  0.3744849 
## Bias as proportion of LoA spread:  -2.444794 
## 
## =======================
## Bias: 
##  -0.009155384  ( -0.01936136  to  0.001050588 ) 
## ULoA: 
##  0.1780871  ( 0.160628  to  0.1955462 ) 
## LLoA: 
##  -0.1963978  ( -0.213857  to  -0.1789387 )

Pre-dorsal

Closer look at the pre-dorsal measurement, since the values lie around the lower LOA, suggesting a difference in manual and digital measurements.

BA plots

We see that the bias is close to zero () and the LOAs include zero and aren’t a wide range (), BUT all the points are at or below the lower LOA. This suggests that while the two methods are close on average, there is a systematic difference where manual measurements consistently yielded larger values than the digital ones.

df_predl <- df_wide %>%
  filter(characteristic == "PreDL")

# Separate Digital and Hand values
digital_values <- df_predl$dig
hand_values <- df_predl$man

# Apply the blandr.statistics function
stats <- blandr.statistics(method1 = digital_values, method2 = hand_values)

# Extract the lower and upper limits of agreement
lower_limit <- stats$upperLOA
upper_limit <- stats$lowerLOA

# Calculate differences and filter for outliers
df_predl <- df_predl %>%
  mutate(diff = dig - man) %>%
  filter(diff < lower_limit | diff > upper_limit)

# View the outliers for PreDL
print(df_predl)
## # A tibble: 339 × 6
##    ID       SPP   characteristic   dig   man   diff
##    <fct>    <fct> <fct>          <dbl> <dbl>  <dbl>
##  1 AD19-001 p.lat PreDL           2.72  3.10 -0.384
##  2 AD19-002 p.lat PreDL           2.53  3.04 -0.506
##  3 AD19-003 p.lat PreDL           2.82  3.25 -0.433
##  4 AD19-004 p.lat PreDL           2.33  2.83 -0.500
##  5 AD19-005 p.lat PreDL           2.64  3.05 -0.406
##  6 AD19-006 p.lat PreDL           2.62  3.02 -0.405
##  7 AD19-007 p.lat PreDL           2.44  2.91 -0.466
##  8 AD19-008 p.lat PreDL           2.45  2.86 -0.412
##  9 AD19-009 p.lat PreDL           2.33  2.91 -0.574
## 10 AD19-010 p.lat PreDL           2.63  3.04 -0.414
## # ℹ 329 more rows
cat("Mean Difference:", mean(df_predl$diff), "\n")
## Mean Difference: -0.4623168
cat("Lower Limit:", lower_limit, "\n")
## Lower Limit: -0.2404683
cat("Upper Limit:", upper_limit, "\n")
## Upper Limit: -0.6841654
df_PreDL <- df_predl %>%
  mutate(
    average = (dig + man) / 2,
    difference = dig - man
  )


ba_PreDL <- ggplot(df_PreDL, aes(x = average, y = difference, color=SPP)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") +  # Mean line
  geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
             color = "blue", linetype = "dashed") +  # Limits of agreement
  labs(title = "Bland-Altman Plots for All Characteristics",
       x = "Average of Digital and Hand Measurements",
       y = "Difference (Digital - Hand)") +
  theme_minimal()

ba_PreDL

library(blandr)

blandr.statistics(df_PreDL$dig, df_PreDL$man)
## Bland-Altman Statistics
## =======================
## t = -75.204, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
## 
## =======================
## Number of comparisons:  339 
## Maximum value for average measures:  3.204808 
## Minimum value for average measures:  2.05628 
## Maximum value for difference in measures:  0.2376452 
## Minimum value for difference in measures:  -0.9116021 
## 
## Bias:  -0.4623168 
## Standard deviation of bias:  0.113188 
## 
## Standard error of bias:  0.006147531 
## Standard error for limits of agreement:  0.01051643 
## 
## Bias:  -0.4623168 
## Bias- upper 95% CI:  -0.4502246 
## Bias- lower 95% CI:  -0.4744091 
## 
## Upper limit of agreement:  -0.2404683 
## Upper LOA- upper 95% CI:  -0.2197824 
## Upper LOA- lower 95% CI:  -0.2611542 
## 
## Lower limit of agreement:  -0.6841654 
## Lower LOA- upper 95% CI:  -0.6634795 
## Lower LOA- lower 95% CI:  -0.7048513 
## 
## =======================
## Derived measures:  
## Mean of differences/means:  -17.19129 
## Point estimate of bias as proportion of lowest average:  -22.48317 
## Point estimate of bias as proportion of highest average -14.42572 
## Spread of data between lower and upper LoAs:  0.4436971 
## Bias as proportion of LoA spread:  -104.1965 
## 
## =======================
## Bias: 
##  -0.4623168  ( -0.4744091  to  -0.4502246 ) 
## ULoA: 
##  -0.2404683  ( -0.2611542  to  -0.2197824 ) 
## LLoA: 
##  -0.6841654  ( -0.7048513  to  -0.6634795 )

Rep-measures ANOVA

Significant (p<0.001) with a large effect size (0.943) meaning there is a significant difference in Pre-dorsal measurements between digital and manual. Based on the BA plot, it appears the manual measurements were higher than the digital measurements.

df_predl2 <- df_long %>%
  filter(characteristic == "PreDL")

anova_predl <- aov(value~method + Error(ID/method), data=df_predl2)
summary(anova_predl)
## 
## Error: ID
##            Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 338   28.1 0.08313               
## 
## Error: ID:method
##            Df Sum Sq Mean Sq F value Pr(>F)    
## method      1  36.23   36.23    5656 <2e-16 ***
## Residuals 338   2.17    0.01                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SS_method <- 36.23  # Sum of squares for method
SS_error <- 2.17   # Sum of squares for residuals (error) from ID:method

# Calculate Partial Eta-Squared for Method
eta_squared_method <- SS_method / (SS_method + SS_error)

# Print the result
cat("Partial Eta-Squared (Method):", eta_squared_method, "\n")
## Partial Eta-Squared (Method): 0.9434896

PCA

In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine which methodology influence the variation in our data the most without worrying about differences in scales/measurements.

Loadings

library(stringr)

z.scores <- raw2

z.scores[, c(3:24)] <- scale(z.scores[, 3:24], center = TRUE, scale = TRUE)

PCA <- prcomp(z.scores[, 3:24])

summary(PCA)
## Importance of components:
##                          PC1     PC2     PC3    PC4     PC5    PC6     PC7
## Standard deviation     4.182 1.21217 0.84996 0.7357 0.56329 0.5180 0.45818
## Proportion of Variance 0.795 0.06679 0.03284 0.0246 0.01442 0.0122 0.00954
## Cumulative Proportion  0.795 0.86180 0.89463 0.9192 0.93366 0.9458 0.95540
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.42037 0.41587 0.33673 0.32661 0.30076 0.28121 0.25212
## Proportion of Variance 0.00803 0.00786 0.00515 0.00485 0.00411 0.00359 0.00289
## Cumulative Proportion  0.96343 0.97129 0.97644 0.98129 0.98540 0.98900 0.99189
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.22165 0.19780 0.16764 0.16404 0.12458 0.10221 0.07463
## Proportion of Variance 0.00223 0.00178 0.00128 0.00122 0.00071 0.00047 0.00025
## Cumulative Proportion  0.99412 0.99590 0.99718 0.99840 0.99911 0.99958 0.99983
##                           PC22
## Standard deviation     0.06048
## Proportion of Variance 0.00017
## Cumulative Proportion  1.00000
(loadings <- PCA$rotation[, 1:5])
##                  PC1          PC2         PC3          PC4          PC5
## SL.dig    -0.2347778 -0.096996721  0.01693687 -0.006176149  0.062160581
## BD1.dig   -0.2284302  0.040438320  0.11828459 -0.082195318 -0.020081064
## BD2.dig   -0.2265082 -0.204230055  0.04686845 -0.114485959  0.014698263
## CPD.dig   -0.2331491  0.027815265  0.02843859  0.060726336  0.028640808
## CPL.dig   -0.2233653 -0.122526759 -0.01765293  0.051407052  0.087380963
## PreDL.dig -0.1795926 -0.455754637  0.38328152 -0.035114502  0.036990294
## DbL.dig   -0.1792735  0.492804564  0.15433728  0.093722366  0.132645722
## HL.dig    -0.1881629  0.037841635 -0.69302734 -0.129545756  0.040276382
## HD1.dig   -0.2314303  0.092691464 -0.15121241 -0.074290038 -0.049191518
## HD2.dig   -0.2202611  0.126740445 -0.37372814 -0.114658545 -0.058170881
## SnL.dig   -0.1887660 -0.209724253 -0.23206376  0.438310297  0.299520788
## OL.dig    -0.2176019 -0.051613729 -0.08223996 -0.187395317 -0.246012805
## SL.man    -0.2346467 -0.066007194  0.05419313 -0.013410667  0.043012548
## BD1.man   -0.2233880  0.137447421  0.15671013  0.048596434 -0.036769031
## CPD.man   -0.2275129  0.051199268  0.08915917  0.048228291  0.001670173
## CPL.man   -0.2229489 -0.084361388  0.02254979  0.033420529  0.088728475
## PreDL.man -0.2160456 -0.280047719  0.02145309 -0.055310926  0.063545028
## DbL.man   -0.1679576  0.536687777  0.19654399  0.059968129  0.115261547
## HL.man    -0.2113179  0.033215809  0.05468238 -0.198535844  0.394539908
## HD1.man   -0.2253717  0.081537288  0.14871885 -0.052250635  0.089215660
## SnL.man   -0.1857245 -0.004219492 -0.02498576  0.725115394 -0.468553383
## OL.man    -0.2025594  0.030649519  0.08639455 -0.347119680 -0.631563787
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")

VM_PCA <- varimax(PCA$rotation[, 1:3]) 

VM_PCA
## $loadings
## 
## Loadings:
##           PC1    PC2    PC3   
## SL.dig    -0.116 -0.220       
## BD1.dig   -0.235 -0.112       
## BD2.dig          -0.302       
## CPD.dig   -0.190 -0.120       
## CPL.dig          -0.231       
## PreDL.dig        -0.493  0.376
## DbL.dig   -0.469  0.276       
## HL.dig     0.168        -0.698
## HD1.dig   -0.142        -0.248
## HD2.dig                 -0.449
## SnL.dig          -0.267 -0.231
## OL.dig           -0.167 -0.147
## SL.man    -0.151 -0.197       
## BD1.man   -0.303              
## CPD.man   -0.227 -0.101       
## CPL.man   -0.118 -0.203       
## PreDL.man        -0.354       
## DbL.man   -0.505  0.315       
## HL.man    -0.190 -0.104       
## HD1.man   -0.270              
## SnL.man   -0.115 -0.114       
## OL.man    -0.197 -0.102       
## 
##                  PC1   PC2   PC3
## SS loadings    1.000 1.000 1.000
## Proportion Var 0.045 0.045 0.045
## Cumulative Var 0.045 0.091 0.136
## 
## $rotmat
##            [,1]        [,2]       [,3]
## [1,]  0.6917396  0.60222957  0.3985170
## [2,] -0.5560565  0.79629540 -0.2381487
## [3,] -0.4607575 -0.05686105  0.8857027
library(factoextra)

(eig.val <- get_eigenvalue(PCA))
##          eigenvalue variance.percent cumulative.variance.percent
## Dim.1  17.490167246      79.50076021                    79.50076
## Dim.2   1.469344948       6.67884067                    86.17960
## Dim.3   0.722436480       3.28380218                    89.46340
## Dim.4   0.541231220       2.46014191                    91.92354
## Dim.5   0.317290132       1.44222787                    93.36577
## Dim.6   0.268321832       1.21964469                    94.58542
## Dim.7   0.209931770       0.95423532                    95.53965
## Dim.8   0.176710702       0.80323046                    96.34288
## Dim.9   0.172949491       0.78613405                    97.12902
## Dim.10  0.113385668       0.51538940                    97.64441
## Dim.11  0.106671751       0.48487160                    98.12928
## Dim.12  0.090458276       0.41117398                    98.54045
## Dim.13  0.079078144       0.35944611                    98.89990
## Dim.14  0.063562571       0.28892078                    99.18882
## Dim.15  0.049128830       0.22331286                    99.41213
## Dim.16  0.039123834       0.17783561                    99.58997
## Dim.17  0.028104434       0.12774743                    99.71772
## Dim.18  0.026908568       0.12231167                    99.84003
## Dim.19  0.015519246       0.07054203                    99.91057
## Dim.20  0.010446972       0.04748624                    99.95806
## Dim.21  0.005570293       0.02531951                    99.98337
## Dim.22  0.003657591       0.01662541                   100.00000
ind <- get_pca_ind(PCA)
head(ind$coord[,1:4])
##        Dim.1     Dim.2      Dim.3       Dim.4
## 1 -5.4067751 0.4298672  0.4151608  0.68096887
## 2 -5.9873007 1.2851793 -0.4499961  0.35356493
## 3 -8.8447937 0.4505076  0.3553199  0.79382477
## 4  0.3842836 1.0995520 -0.4421054  0.15666553
## 5 -3.6094489 0.6935377  0.4998015 -0.39342915
## 6 -3.9868310 0.8667640  0.2054317 -0.03320378
raw.p <- cbind(z.scores, ind$coord[,1:4])

Post PCA tests

In order to compare dimensions 1-4 between manual and digital measurements, I need to actually run a PCA on digital only and manual only data sets, then perform a paired t-test.

Let’s create the data sets

dig_p_long <- z.scores %>%
  select(ID, SPP, ends_with(".dig"))


man_p_long <- z.scores %>%
  select(ID, SPP, ends_with(".man"))

Now let’s run the individual PCAs

Digital PCA

PCA_dig <- prcomp(dig_p_long[, 3:14])

summary(PCA_dig)
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.1141 0.97826 0.74710 0.56729 0.41175 0.34804 0.25763
## Proportion of Variance 0.8081 0.07975 0.04651 0.02682 0.01413 0.01009 0.00553
## Cumulative Proportion  0.8081 0.88787 0.93438 0.96120 0.97533 0.98542 0.99096
##                            PC8     PC9    PC10    PC11    PC12
## Standard deviation     0.21796 0.19369 0.11394 0.07637 0.06855
## Proportion of Variance 0.00396 0.00313 0.00108 0.00049 0.00039
## Cumulative Proportion  0.99491 0.99804 0.99912 0.99961 1.00000
(loadings_dig <- PCA_dig$rotation[, 1:5])
##                  PC1         PC2          PC3         PC4         PC5
## SL.dig    -0.3169909 -0.10611458  0.060604567 -0.02354075  0.18176533
## BD1.dig   -0.3049862  0.01844403  0.250670038 -0.08937686 -0.10696578
## BD2.dig   -0.3063687 -0.22926027  0.015047023 -0.21325192  0.09067490
## CPD.dig   -0.3113160  0.02702292  0.134490705  0.06974432  0.10056312
## CPL.dig   -0.3033025 -0.12560059  0.020502291 -0.01630232  0.47718294
## PreDL.dig -0.2422550 -0.64531966  0.171752342 -0.09719333  0.02497410
## DbL.dig   -0.2321796  0.50569082  0.572575906  0.32709467  0.04365808
## HL.dig    -0.2625901  0.30850000 -0.607101208 -0.21853627  0.16435216
## HD1.dig   -0.3120389  0.17140191  0.004180317 -0.07011570 -0.05451678
## HD2.dig   -0.2995263  0.29442543 -0.204803116 -0.16644442  0.01982239
## SnL.dig   -0.2592646 -0.18806057 -0.382995034  0.84022275 -0.14244593
## OL.dig    -0.2964414 -0.02385058 -0.014007596 -0.19509333 -0.81011359
dig.sorted.loadings.1 <- loadings_dig[order(loadings_dig[, 1]), 1]
dig.myTitle <- "Loadings Plot for PC1" 
dig.myXlab  <- "Variable Loadings"
dotchart(dig.sorted.loadings.1, main=dig.myTitle, xlab=dig.myXlab, cex=1.5, col="red")

dig.sorted.loadings.2 <- loadings_dig[order(loadings_dig[, 2]), 2]
dig.myTitle <- "Loadings Plot for PC2" 
dig.myXlab  <- "Variable Loadings"
dotchart(dig.sorted.loadings.2, main=dig.myTitle, xlab=dig.myXlab, cex=1.5, col="red")

dig.sorted.loadings.3 <- loadings_dig[order(loadings_dig[, 3]), 3]
dig.myTitle <- "Loadings Plot for PC3" 
dig.myXlab  <- "Variable Loadings"
dotchart(dig.sorted.loadings.3, main=dig.myTitle, xlab=dig.myXlab, cex=1.5, col="red")

dig.VM_PCA <- varimax(PCA$rotation[, 1:3]) 

dig.VM_PCA
## $loadings
## 
## Loadings:
##           PC1    PC2    PC3   
## SL.dig    -0.116 -0.220       
## BD1.dig   -0.235 -0.112       
## BD2.dig          -0.302       
## CPD.dig   -0.190 -0.120       
## CPL.dig          -0.231       
## PreDL.dig        -0.493  0.376
## DbL.dig   -0.469  0.276       
## HL.dig     0.168        -0.698
## HD1.dig   -0.142        -0.248
## HD2.dig                 -0.449
## SnL.dig          -0.267 -0.231
## OL.dig           -0.167 -0.147
## SL.man    -0.151 -0.197       
## BD1.man   -0.303              
## CPD.man   -0.227 -0.101       
## CPL.man   -0.118 -0.203       
## PreDL.man        -0.354       
## DbL.man   -0.505  0.315       
## HL.man    -0.190 -0.104       
## HD1.man   -0.270              
## SnL.man   -0.115 -0.114       
## OL.man    -0.197 -0.102       
## 
##                  PC1   PC2   PC3
## SS loadings    1.000 1.000 1.000
## Proportion Var 0.045 0.045 0.045
## Cumulative Var 0.045 0.091 0.136
## 
## $rotmat
##            [,1]        [,2]       [,3]
## [1,]  0.6917396  0.60222957  0.3985170
## [2,] -0.5560565  0.79629540 -0.2381487
## [3,] -0.4607575 -0.05686105  0.8857027
(dig.eig.val <- get_eigenvalue(PCA_dig))
##         eigenvalue variance.percent cumulative.variance.percent
## Dim.1  9.697460424      80.81217020                    80.81217
## Dim.2  0.956988897       7.97490747                    88.78708
## Dim.3  0.558155419       4.65129516                    93.43837
## Dim.4  0.321819658       2.68183049                    96.12020
## Dim.5  0.169536257       1.41280214                    97.53301
## Dim.6  0.121130526       1.00942105                    98.54243
## Dim.7  0.066371341       0.55309451                    99.09552
## Dim.8  0.047504771       0.39587309                    99.49139
## Dim.9  0.037517676       0.31264730                    99.80404
## Dim.10 0.012983044       0.10819203                    99.91223
## Dim.11 0.005832920       0.04860766                    99.96084
## Dim.12 0.004699067       0.03915889                   100.00000
dig.ind <- get_pca_ind(PCA_dig)
head(dig.ind$coord[,1:4])
##       Dim.1      Dim.2       Dim.3     Dim.4
## 1 -3.877405 -0.1360907  0.21810368 0.6037004
## 2 -4.350838  1.0993666  0.01943977 0.6087639
## 3 -6.089192  0.1481213  0.42598106 0.5018142
## 4  0.215744  0.9684140 -0.13146684 0.9619029
## 5 -2.633685  0.2573236  0.58017122 0.2108573
## 6 -3.078674  0.3356335  0.19729525 0.6931265
dig.p <- cbind(dig_p_long, dig.ind$coord[,1:4])

Manual PCA

PCA_man <- prcomp(man_p_long[, 3:12])

summary(PCA_man)
## Importance of components:
##                           PC1     PC2     PC3    PC4     PC5     PC6     PC7
## Standard deviation     2.8377 0.80736 0.67359 0.5225 0.42289 0.34999 0.30348
## Proportion of Variance 0.8052 0.06518 0.04537 0.0273 0.01788 0.01225 0.00921
## Cumulative Proportion  0.8052 0.87042 0.91580 0.9431 0.96098 0.97323 0.98244
##                            PC8     PC9    PC10
## Standard deviation     0.28419 0.27405 0.14043
## Proportion of Variance 0.00808 0.00751 0.00197
## Cumulative Proportion  0.99052 0.99803 1.00000
(loadings_man <- PCA_man$rotation[, 1:5])
##                  PC1          PC2          PC3         PC4         PC5
## SL.man    -0.3448760 -0.159264090  0.028818596  0.08348653  0.17222913
## BD1.man   -0.3336431  0.171772379 -0.041810129 -0.02260661  0.23746504
## CPD.man   -0.3381457  0.020375133 -0.022859381  0.02968760  0.21184581
## CPL.man   -0.3287681 -0.213919604 -0.004339041  0.15099603  0.37304666
## PreDL.man -0.3128472 -0.481495839  0.092930271  0.10276539  0.13586031
## DbL.man   -0.2603841  0.806413535 -0.039923819  0.07691051  0.12592852
## HL.man    -0.3152071  0.005916009  0.329640553  0.42089544 -0.72541972
## HD1.man   -0.3367866  0.089004745  0.107331634  0.10025382 -0.06396107
## SnL.man   -0.2773374 -0.093007562 -0.874263032 -0.13518487 -0.35690086
## OL.man    -0.3032190 -0.014288942  0.319579808 -0.86422398 -0.19464322
man.sorted.loadings.1 <- loadings_man[order(loadings_man[, 1]), 1]
man.myTitle <- "Loadings Plot for PC1" 
man.myXlab  <- "Variable Loadings"
dotchart(man.sorted.loadings.1, main=man.myTitle, xlab=man.myXlab, cex=1.5, col="red")

man.sorted.loadings.2 <- loadings_man[order(loadings_man[, 2]), 2]
man.myTitle <- "Loadings Plot for PC2" 
man.myXlab  <- "Variable Loadings"
dotchart(man.sorted.loadings.2, main=man.myTitle, xlab=man.myXlab, cex=1.5, col="red")

man.sorted.loadings.3 <- loadings_man[order(loadings_man[, 3]), 3]
man.myTitle <- "Loadings Plot for PC3" 
man.myXlab  <- "Variable Loadings"
dotchart(man.sorted.loadings.3, main=man.myTitle, xlab=man.myXlab, cex=1.5, col="red")

man.VM_PCA <- varimax(PCA$rotation[, 1:3]) 

man.VM_PCA
## $loadings
## 
## Loadings:
##           PC1    PC2    PC3   
## SL.dig    -0.116 -0.220       
## BD1.dig   -0.235 -0.112       
## BD2.dig          -0.302       
## CPD.dig   -0.190 -0.120       
## CPL.dig          -0.231       
## PreDL.dig        -0.493  0.376
## DbL.dig   -0.469  0.276       
## HL.dig     0.168        -0.698
## HD1.dig   -0.142        -0.248
## HD2.dig                 -0.449
## SnL.dig          -0.267 -0.231
## OL.dig           -0.167 -0.147
## SL.man    -0.151 -0.197       
## BD1.man   -0.303              
## CPD.man   -0.227 -0.101       
## CPL.man   -0.118 -0.203       
## PreDL.man        -0.354       
## DbL.man   -0.505  0.315       
## HL.man    -0.190 -0.104       
## HD1.man   -0.270              
## SnL.man   -0.115 -0.114       
## OL.man    -0.197 -0.102       
## 
##                  PC1   PC2   PC3
## SS loadings    1.000 1.000 1.000
## Proportion Var 0.045 0.045 0.045
## Cumulative Var 0.045 0.091 0.136
## 
## $rotmat
##            [,1]        [,2]       [,3]
## [1,]  0.6917396  0.60222957  0.3985170
## [2,] -0.5560565  0.79629540 -0.2381487
## [3,] -0.4607575 -0.05686105  0.8857027
library(factoextra)

(man.eig.val <- get_eigenvalue(PCA_man))
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1  8.05239960       80.5239960                    80.52400
## Dim.2  0.65182332        6.5182332                    87.04223
## Dim.3  0.45372921        4.5372921                    91.57952
## Dim.4  0.27302458        2.7302458                    94.30977
## Dim.5  0.17883881        1.7883881                    96.09816
## Dim.6  0.12249607        1.2249607                    97.32312
## Dim.7  0.09210306        0.9210306                    98.24415
## Dim.8  0.08076290        0.8076290                    99.05178
## Dim.9  0.07510089        0.7510089                    99.80278
## Dim.10 0.01972155        0.1972155                   100.00000
man.ind <- get_pca_ind(PCA_man)
head(man.ind$coord[,1:4])
##        Dim.1     Dim.2        Dim.3      Dim.4
## 1 -3.7784207 0.6451182 -0.396593632 0.06849222
## 2 -4.1217187 0.6603292  0.008902533 0.87860206
## 3 -6.4466569 0.1376030 -0.378551677 0.95660998
## 4  0.3309617 0.6450504  0.446021000 1.18793061
## 5 -2.4644018 0.5773132  0.605969577 0.29865214
## 6 -2.5445684 0.9381672  0.511011644 0.61730956
man.p <- cbind(man_p_long, man.ind$coord[,1:4])

Now let’s do the t-tests

T-tests

(pc1 <- t.test(dig.p$Dim.1, man.p$Dim.1, paired= TRUE, alternative = "two.sided"))
## 
##  Paired t-test
## 
## data:  dig.p$Dim.1 and man.p$Dim.1
## t = 7.2114e-16, df = 338, p-value = 1
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.08315722  0.08315722
## sample estimates:
## mean difference 
##    3.048687e-17
(pc2 <- t.test(dig.p$Dim.2, man.p$Dim.2, paired= TRUE, alternative = "two.sided"))
## 
##  Paired t-test
## 
## data:  dig.p$Dim.2 and man.p$Dim.2
## t = -2.1474e-15, df = 338, p-value = 1
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.07233049  0.07233049
## sample estimates:
## mean difference 
##   -7.896446e-17
(pc3 <- t.test(dig.p$Dim.3, man.p$Dim.3, paired= TRUE, alternative = "two.sided"))
## 
##  Paired t-test
## 
## data:  dig.p$Dim.3 and man.p$Dim.3
## t = 4.112e-17, df = 338, p-value = 1
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.1053799  0.1053799
## sample estimates:
## mean difference 
##    2.202945e-18

Plots

PCA 1v2

library(AMR) 
library(ggplot2)
library(ggfortify)
library(ggbiplot)

wes.yel <- "#E1AF00"
wes.blu <- "#78B7C5"

plot1<- autoplot(PCA, data = z.scores, colour="SPP", loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+scale_color_manual(values=c("#E1AF00","#78B7C5", "#856A9C"))+ scale_fill_manual(values=c("#E1AF00","#78B7C5", "#856A9C"))+ ggtitle("PCA Plot of Morphology traits") + theme_classic() 



#ggsave(plot1, filename = "pc1v2.png", bg="transparent", width = 180, height = 120, units = "mm", dpi = 300)

PCA 2v3

plot5<- autoplot(PCA, x=2, y=3, data = z.scores, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+scale_color_manual(values=c("#E1AF00","#78B7C5", "#856A9C"))+ scale_fill_manual(values=c("#E1AF00","#78B7C5", "#856A9C"))+ ggtitle("PCA Plot of Morphology traits") + theme_classic()
plot5

#ggsave(plot5, filename = "pc2v3.png", bg="transparent", width = 180, height = 120, units = "mm", dpi = 300)

By method

If I want to color by method, I think I need to combine my individual PCAs.

#data frame with just BD1 and HD1 for dig
dig_p_long2 <- z.scores %>%
  select(ID, SPP, ends_with(".dig"))
dig_p_long2 <- subset(dig_p_long2, select = -c(BD2.dig, HD2.dig))

#data frame with just BD2 and HD2 for dig
dig_p_long3 <- z.scores %>%
  select(ID, SPP, ends_with(".dig"))
dig_p_long3 <- subset(dig_p_long3, select = -c(BD1.dig, HD1.dig))

PCA_dig <- prcomp(dig_p_long2[, 3:12])

#PC scores for digital (BD1/HD1)
PCA.scores.dig <- as.data.frame(PCA_dig$x)
PCA.scores.dig$method <- "dig"
PCA.scores.dig$ID <- dig_p_long$ID
PCA.scores.dig$SPP <- dig_p_long$SPP

PCA_dig2 <- prcomp(dig_p_long3[, 3:12])

#PC scores for digital (BD1/HD1)
PCA.scores.dig2 <- as.data.frame(PCA_dig2$x)
PCA.scores.dig2$method <- "dig"
PCA.scores.dig2$ID <- dig_p_long2$ID
PCA.scores.dig2$SPP <- dig_p_long2$SPP

#PC scores for manual
PCA.scores.man <- as.data.frame(PCA_man$x)
PCA.scores.man$method <- "man"
PCA.scores.man$ID <- man_p_long$ID
PCA.scores.man$SPP <- man_p_long$SPP

PCA.comb <- rbind(PCA.scores.dig, PCA.scores.man)
PCA.comb2 <- rbind(PCA.scores.dig2, PCA.scores.man)

ggplot(data = PCA.comb, aes(x = PC1, y = PC2, color = method)) +
  geom_point(alpha = 0.8, size = 3) +
  stat_ellipse(aes(group = method), level = 0.95) +
  scale_color_manual(values=c("#E1AF00","#78B7C5"))+
  scale_fill_manual(values=c("#E1AF00","#78B7C5")) +
  labs(
    title = "PCA of Digital and Manual Measurements (BD/HD1)",
    x = "Principal Component 1",
    y = "Principal Component 2",
    color = "Method"
  ) +
  theme_minimal()

ggplot(data = PCA.comb, aes(x = PC2, y = PC3, color = method)) +
  geom_point(alpha = 0.8, size = 3) +
  stat_ellipse(aes(group = method), level = 0.95) +
  scale_color_manual(values=c("#E1AF00","#78B7C5"))+
  scale_fill_manual(values=c("#E1AF00","#78B7C5")) +
  labs(
    title = "PCA of Digital and Manual Measurements (BD/HD1)",
    x = "Principal Component 1",
    y = "Principal Component 2",
    color = "Method"
  ) +
  theme_minimal()

ggplot(data = PCA.comb2, aes(x = PC1, y = PC2, color = method)) +
  geom_point(alpha = 0.8, size = 3) +
  stat_ellipse(aes(group = method), level = 0.95) +
  scale_color_manual(values=c("#E1AF00","#78B7C5"))+
  scale_fill_manual(values=c("#E1AF00","#78B7C5")) +
  labs(
    title = "PCA of Digital and Manual Measurements (BD/HD2)",
    x = "Principal Component 1",
    y = "Principal Component 2",
    color = "Method"
  ) +
  theme_minimal()

ggplot(data = PCA.comb2, aes(x = PC2, y = PC3, color = method)) +
  geom_point(alpha = 0.8, size = 3) +
  stat_ellipse(aes(group = method), level = 0.95) +
  scale_color_manual(values=c("#E1AF00","#78B7C5"))+
  scale_fill_manual(values=c("#E1AF00","#78B7C5")) +
  labs(
    title = "PCA of Digital and Manual Measurements (BD/HD2)",
    x = "Principal Component 1",
    y = "Principal Component 2",
    color = "Method"
  ) +
  theme_minimal()